Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote:
> gem_concurrent_blit tries to ensure that it doesn't try and run a test
> that would grind the system to a halt, i.e. unexpectedly cause swap
> thrashing. It currently calls intel_require_memory(), but outside of
> the subtest (as the tests use fork, it cannot do requirement testing
> within the test children) - but intel_require_memory() calls
> igt_require() and triggers and abort. Wrapping that initial require
> within an igt_fixture() stops the abort(), but also prevents any further
> testing.
> 
> This patch restructures the requirement checking to ordinary conditions,
> which though allowing the test to run, also prevents listing of subtests
> on machines which cannot handle them.


> ---
>  lib/igt_aux.h  |  2 ++
>  lib/intel_os.c | 53 +++-
>  tests/gem_concurrent_all.c | 67 
> +-
>  3 files changed, 85 insertions(+), 37 deletions(-)
> 
> diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> index 6e11ee6..5a88c2a 100644
> --- a/lib/igt_aux.h
> +++ b/lib/igt_aux.h
> @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void);
>  
>  #define CHECK_RAM 0x1
>  #define CHECK_SWAP 0x2
> +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode,
> +  uint64_t *out_required, uint64_t *out_total);
>  void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
>  int intel_num_objects_for_memory(uint32_t size, unsigned mode);
>  
> diff --git a/lib/intel_os.c b/lib/intel_os.c
> index dba9e17..90f9bb3 100644
> --- a/lib/intel_os.c
> +++ b/lib/intel_os.c
> @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void)
>   return retval / (1024*1024);
>  }
>  

Please add the usual gtkdoc boilerplate here with a mention of
intel_check_memory. Ack with that.
-Daniel

> +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode,
> +  uint64_t *out_required, uint64_t *out_total)
> +{
> +/* rough estimate of how many bytes the kernel requires to track each object 
> */
> +#define KERNEL_BO_OVERHEAD 512
> + uint64_t required, total;
> +
> + required = count;
> + required *= size + KERNEL_BO_OVERHEAD;
> + required = ALIGN(required, 4096);
> +
> + igt_debug("Checking %'u surfaces of size %'llu bytes (total %'llu) 
> against %s%s\n",
> +   count, (long long)size, (long long)required,
> +   mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
> +   mode & CHECK_SWAP ? " + swap": "");
> +
> + total = 0;
> + if (mode & (CHECK_RAM | CHECK_SWAP))
> + total += intel_get_avail_ram_mb();
> + if (mode & CHECK_SWAP)
> + total += intel_get_total_swap_mb();
> + total *= 1024 * 1024;
> +
> + if (out_required)
> + *out_required = required;
> +
> + if (out_total)
> + *out_total = total;
> +
> + return required < total;
> +}
> +
>  /**
>   * intel_require_memory:
>   * @count: number of surfaces that will be created
> @@ -217,27 +249,10 @@ intel_get_total_swap_mb(void)
>   */
>  void intel_require_memory(uint32_t count, uint64_t size, unsigned mode)
>  {
> -/* rough estimate of how many bytes the kernel requires to track each object 
> */
> -#define KERNEL_BO_OVERHEAD 512
>   uint64_t required, total;
>  
> - required = count;
> - required *= size + KERNEL_BO_OVERHEAD;
> - required = ALIGN(required, 4096);
> -
> - igt_debug("Checking %'u surfaces of size %'llu bytes (total %'llu) 
> against %s%s\n",
> -   count, (long long)size, (long long)required,
> -   mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
> -   mode & CHECK_SWAP ? " + swap": "");
> -
> - total = 0;
> - if (mode & (CHECK_RAM | CHECK_SWAP))
> - total += intel_get_avail_ram_mb();
> - if (mode & CHECK_SWAP)
> - total += intel_get_total_swap_mb();
> - total *= 1024 * 1024;
> -
> - igt_skip_on_f(total <= required,
> + igt_skip_on_f(!__intel_check_memory(count, size, mode,
> + &required, &total),
> "Estimated that we need %'llu bytes for the test, but 
> only have %'llu bytes available (%s%s)\n",
> (long long)required, (long long)total,
> mode & (CHECK_RAM | CHECK_SWAP) ? "RAM" : "",
> diff --git a/tests/gem_concurrent_all.c b/tests/gem_concurrent_all.c
> index 0e873c4..9a2fb6d 100644
> --- a/tests/gem_concurrent_all.c
> +++ b/tests/gem_concurrent_all.c
> @@ -155,9 +155,9 @@ static bool can_create_stolen(void)
>  static drm_intel_bo *
>  (*create_func)(drm_intel_bufmgr *bufmgr, uint64_t size);
>  
> -static void create_cpu_require(void)
> +static bool create_cpu_require(void)
>  {
> - igt_require(create_func != create_stolen_bo);
> + return create_func != create_stolen_bo;
>  }
>  
>  static drm_intel_bo *
> @@ -375,7 +375,7 @@ gpu_

Re: [Intel-gfx] [PATCH 01/13] drm/i915/bdw+: Replace list_del+list_add_tail with list_move_tail

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:40AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Same effect for slightly less source code and resulting binary.
> 
> Signed-off-by: Tvrtko Ursulin 

Reviewed-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 15 ++-
>  1 file changed, 6 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 23839ff04e27..8b6071fcd743 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -431,9 +431,8 @@ static void execlists_context_unqueue(struct 
> intel_engine_cs *ring)
>   /* Same ctx: ignore first request, as second request
>* will update tail past first request's workload */
>   cursor->elsp_submitted = req0->elsp_submitted;
> - list_del(&req0->execlist_link);
> - list_add_tail(&req0->execlist_link,
> - &ring->execlist_retired_req_list);
> + list_move_tail(&req0->execlist_link,
> +&ring->execlist_retired_req_list);
>   req0 = cursor;
>   } else {
>   req1 = cursor;
> @@ -485,9 +484,8 @@ static bool execlists_check_remove_request(struct 
> intel_engine_cs *ring,
>"Never submitted head request\n");
>  
>   if (--head_req->elsp_submitted <= 0) {
> - list_del(&head_req->execlist_link);
> - list_add_tail(&head_req->execlist_link,
> - &ring->execlist_retired_req_list);
> + list_move_tail(&head_req->execlist_link,
> +
> &ring->execlist_retired_req_list);
>   return true;

Aside: Some of this code is over-indented ...
-Daniel

>   }
>   }
> @@ -608,9 +606,8 @@ static int execlists_context_queue(struct 
> drm_i915_gem_request *request)
>   if (request->ctx == tail_req->ctx) {
>   WARN(tail_req->elsp_submitted != 0,
>   "More than 2 already-submitted reqs queued\n");
> - list_del(&tail_req->execlist_link);
> - list_add_tail(&tail_req->execlist_link,
> - &ring->execlist_retired_req_list);
> + list_move_tail(&tail_req->execlist_link,
> +&ring->execlist_retired_req_list);
>   }
>   }
>  
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 02/13] drm/i915: Don't need a timer to wake us up

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:41AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Looks like the sleeping loop in __i915_wait_request can be
> simplified by using io_schedule_timeout instead of setting
> up and destroying a timer.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Chris Wilson 

io_schedule_timeout was only added in

commit 9cff8adeaa34b5d2802f03f89803da57856b3b72
Author: NeilBrown 
Date:   Fri Feb 13 15:49:17 2015 +1100

sched: Prevent recursion in io_schedule()

(well the EXPORT_SYMBOL for it), that was iirc why this was open-coded.
Please add this to your commit message.

Reviewed-by: Daniel Vetter 

> ---
>  drivers/gpu/drm/i915/i915_gem.c | 28 
>  1 file changed, 8 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6c60e04fc09c..de98dc41fb9f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1135,11 +1135,6 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   return 0;
>  }
>  
> -static void fake_irq(unsigned long data)
> -{
> - wake_up_process((struct task_struct *)data);
> -}
> -
>  static bool missed_irq(struct drm_i915_private *dev_priv,
>  struct intel_engine_cs *ring)
>  {
> @@ -1291,7 +1286,7 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   }
>  
>   for (;;) {
> - struct timer_list timer;
> + long sched_timeout;
>  
>   prepare_to_wait(&ring->irq_queue, &wait, state);
>  
> @@ -1321,21 +1316,14 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   break;
>   }
>  
> - timer.function = NULL;
> - if (timeout || missed_irq(dev_priv, ring)) {
> - unsigned long expire;
> -
> - setup_timer_on_stack(&timer, fake_irq, (unsigned 
> long)current);
> - expire = missed_irq(dev_priv, ring) ? jiffies + 1 : 
> timeout_expire;
> - mod_timer(&timer, expire);
> - }
> -
> - io_schedule();
> + if (timeout)
> + sched_timeout = timeout_expire - jiffies;
> + else if (missed_irq(dev_priv, ring))
> + sched_timeout = 1;
> + else
> + sched_timeout = MAX_SCHEDULE_TIMEOUT;
>  
> - if (timer.function) {
> - del_singleshot_timer_sync(&timer);
> - destroy_timer_on_stack(&timer);
> - }
> + io_schedule_timeout(sched_timeout);
>   }
>   if (!irq_test_in_progress)
>   ring->irq_put(ring);
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/13] drm/i915: Avoid invariant conditionals in lrc interrupt handler

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:42AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> There is no need to check on what Gen we are running on every
> interrupt and every command submission. We can instead set up
> some of that when engines are initialized, store it in the
> engine structure and use it directly at runtime.
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c| 36 
> ++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
>  2 files changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 8b6071fcd743..84977a6e6f3f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -298,29 +298,15 @@ uint64_t intel_lr_context_descriptor(struct 
> intel_context *ctx,
>struct intel_engine_cs *ring)
>  {
>   struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
> - uint64_t desc;
> + uint64_t desc = ring->ctx_desc_template;
>   uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
>   LRC_PPHWSP_PN * PAGE_SIZE;
>  
>   WARN_ON(lrca & 0x0FFFULL);
>  
> - desc = GEN8_CTX_VALID;
> - desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> - if (IS_GEN8(ctx_obj->base.dev))
> - desc |= GEN8_CTX_L3LLC_COHERENT;
> - desc |= GEN8_CTX_PRIVILEGE;
>   desc |= lrca;
>   desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
>  
> - /* TODO: WaDisableLiteRestore when we start using semaphore
> -  * signalling between Command Streamers */
> - /* desc |= GEN8_CTX_FORCE_RESTORE; */
> -
> - /* WaEnableForceRestoreInCtxtDescForVCS:skl */
> - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */
> - if (disable_lite_restore_wa(ring))
> - desc |= GEN8_CTX_FORCE_RESTORE;
> -
>   return desc;
>  }

tbh I'd go full monty and just cache the entire context descriptor.
-Daniel

>  
> @@ -556,7 +542,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
>   }
>   }
>  
> - if (disable_lite_restore_wa(ring)) {
> + if (ring->disable_lite_restore_wa) {
>   /* Prevent a ctx to preempt itself */
>   if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
>   (submit_contexts != 0))
> @@ -1980,6 +1966,24 @@ static int logical_ring_init(struct drm_device *dev, 
> struct intel_engine_cs *rin
>   goto error;
>   }
>  
> + ring->disable_lite_restore_wa = disable_lite_restore_wa(ring);
> +
> + ring->ctx_desc_template = GEN8_CTX_VALID;
> + ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) <<
> +GEN8_CTX_ADDRESSING_MODE_SHIFT;
> + if (IS_GEN8(dev))
> + ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
> + ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
> +
> + /* TODO: WaDisableLiteRestore when we start using semaphore
> +  * signalling between Command Streamers */
> + /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */
> +
> + /* WaEnableForceRestoreInCtxtDescForVCS:skl */
> + /* WaEnableForceRestoreInCtxtDescForVCS:bxt */
> + if (ring->disable_lite_restore_wa)
> + ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
> +
>   return 0;
>  
>  error:
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 49574ffe54bc..0b91a4b77359 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -268,6 +268,8 @@ struct  intel_engine_cs {
>   struct list_head execlist_queue;
>   struct list_head execlist_retired_req_list;
>   u8 next_context_status_buffer;
> + bool disable_lite_restore_wa;
> + u32 ctx_desc_template;
>   u32 irq_keep_mask; /* bitmask for interrupts that should 
> not be masked */
>   int (*emit_request)(struct drm_i915_gem_request *request);
>   int (*emit_flush)(struct drm_i915_gem_request *request,
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 04/13] drm/i915: Fail engine initialization if LRCA is incorrectly aligned

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:43AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> LRCA can change only when it goes from unpinned to pinned so it
> makes sense to check its alignment at that point rather than at
> every batch buffer submission.
> 
> Furthermore, if we check it at pin time we can actually
> gracefuly fail the engine initialization rather than just
> spamming the logs at runtime with WARNs.
> 
> v2: Return ENODEV for bad alignment. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 84977a6e6f3f..ff146a15d395 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -302,8 +302,6 @@ uint64_t intel_lr_context_descriptor(struct intel_context 
> *ctx,
>   uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
>   LRC_PPHWSP_PN * PAGE_SIZE;
>  
> - WARN_ON(lrca & 0x0FFFULL);
> -
>   desc |= lrca;
>   desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
>  
> @@ -1030,6 +1028,7 @@ static int intel_lr_context_do_pin(struct 
> intel_engine_cs *ring,
>  {
>   struct drm_device *dev = ring->dev;
>   struct drm_i915_private *dev_priv = dev->dev_private;
> + u64 lrca;
>   int ret = 0;
>  
>   WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> @@ -1038,6 +1037,12 @@ static int intel_lr_context_do_pin(struct 
> intel_engine_cs *ring,
>   if (ret)
>   return ret;
>  
> + lrca = i915_gem_obj_ggtt_offset(ctx_obj) + LRC_PPHWSP_PN * PAGE_SIZE;
> + if (WARN_ON(lrca & 0x0FFFULL)) {

Essentially this checks that it's page-aligned (which is a fundamental
assumption of how we place objects we depend upon everywhere) and that it
fits within the 4G hw limit of the global gtt (again we assume our code is
correct that way). tbh I'd just drop entirely, it's a useless check.
-Daniel

> + ret = -ENODEV;
> + goto unpin_ctx_obj;
> + }
> +
>   ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
>   if (ret)
>   goto unpin_ctx_obj;
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/13] drm/i915: Cache LRCA in the context

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:44AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> We are not allowed to call i915_gem_obj_ggtt_offset from irq
> context without the big kernel lock held.
> 
> LRCA lifetime is well defined so cache it so it can be looked up
> cheaply from the interrupt context and at command submission
> time.
> 
> v2: Added irq context reasoning to the commit message. (Daniel Vetter)
> 
> Signed-off-by: Tvrtko Ursulin 

A i915_obj_for_each_vma macro with a
WARN_ON(!mutex_is_locked(dev->struct_mutex)) would be awesome to validate
this. Especially since this is by far not the only time I've seen this
bug. Needs to be a follow-up though to avoid stalling this fix.

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 15 ++
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c| 40 
> -
>  drivers/gpu/drm/i915/intel_lrc.h|  3 ++-
>  4 files changed, 26 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 3b05bd189eab..714a45cf8a51 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1976,12 +1976,13 @@ static int i915_context_status(struct seq_file *m, 
> void *unused)
>  }
>  
>  static void i915_dump_lrc_obj(struct seq_file *m,
> -   struct intel_engine_cs *ring,
> -   struct drm_i915_gem_object *ctx_obj)
> +   struct intel_context *ctx,
> +   struct intel_engine_cs *ring)
>  {
>   struct page *page;
>   uint32_t *reg_state;
>   int j;
> + struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
>   unsigned long ggtt_offset = 0;
>  
>   if (ctx_obj == NULL) {
> @@ -1991,7 +1992,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   }
>  
>   seq_printf(m, "CONTEXT: %s %u\n", ring->name,
> -intel_execlists_ctx_id(ctx_obj));
> +intel_execlists_ctx_id(ctx, ring));
>  
>   if (!i915_gem_obj_ggtt_bound(ctx_obj))
>   seq_puts(m, "\tNot bound in GGTT\n");
> @@ -2040,8 +2041,7 @@ static int i915_dump_lrc(struct seq_file *m, void 
> *unused)
>   list_for_each_entry(ctx, &dev_priv->context_list, link) {
>   for_each_ring(ring, dev_priv, i) {
>   if (ring->default_context != ctx)
> - i915_dump_lrc_obj(m, ring,
> -   ctx->engine[i].state);
> + i915_dump_lrc_obj(m, ctx, ring);
>   }
>   }
>  
> @@ -2115,11 +2115,8 @@ static int i915_execlists(struct seq_file *m, void 
> *data)
>  
>   seq_printf(m, "\t%d requests in queue\n", count);
>   if (head_req) {
> - struct drm_i915_gem_object *ctx_obj;
> -
> - ctx_obj = head_req->ctx->engine[ring_id].state;
>   seq_printf(m, "\tHead request id: %u\n",
> -intel_execlists_ctx_id(ctx_obj));
> +intel_execlists_ctx_id(head_req->ctx, ring));
>   seq_printf(m, "\tHead request tail: %u\n",
>  head_req->tail);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8cf655c6fc03..b77a5d84eac2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -881,6 +881,7 @@ struct intel_context {
>   struct drm_i915_gem_object *state;
>   struct intel_ringbuffer *ringbuf;
>   int pin_count;
> + u32 lrca;

lrc_offset imo. Consistent with our other usage in the driver, and
actually readable. Please apply liberally everywhere else (I know that
bsepc calls it lrca, but we don't need to follow bad naming styles
blindly).

With that Reviewed-by: Daniel Vetter 

>   } engine[I915_NUM_RINGS];
>  
>   struct list_head link;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index ff146a15d395..ffe004de22b0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -265,7 +265,8 @@ int intel_sanitize_enable_execlists(struct drm_device 
> *dev, int enable_execlists
>  
>  /**
>   * intel_execlists_ctx_id() - get the Execlists Context ID
> - * @ctx_obj: Logical Ring Context backing object.
> + * @ctx: Context to get the ID for
> + * @ring: Engine to get the ID for
>   *
>   * Do not confuse with ctx->id! Unfortunately we have a name overload
>   * here: the old context ID we pass to userspace as a handler so that
> @@ -275,14 +276,12 @@ int intel_sanitize_enable_execlists(struct drm_device 
> *dev, int enable_execlists
>   *
>   * Return: 20-bits globally unique context ID.
>   */
> -u32 intel_execlists_ctx_id(struct drm_i915_gem_object *

Re: [Intel-gfx] [PATCH 06/13] drm/i915: Only grab timestamps when needed

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:45AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> No need to call ktime_get_raw_ns twice per unlimited wait and can
> also elimate a local variable.
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index de98dc41fb9f..c4f69579eb7a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1246,7 +1246,7 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
>   DEFINE_WAIT(wait);
>   unsigned long timeout_expire;
> - s64 before, now;
> + s64 before = 0;

Is gcc really this dense? Should be easy for it to spot that both branches
depend upon the same condition. Please remove that assignment. With that
changed:

Reviewed-by: Daniel Vetter 

>   int ret;
>  
>   WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
> @@ -1266,14 +1266,17 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   return -ETIME;
>  
>   timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
> +
> + /*
> +  * Record current time in case interrupted by signal, or wedged.
> +  */
> + before = ktime_get_raw_ns();
>   }
>  
>   if (INTEL_INFO(dev_priv)->gen >= 6)
>   gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
>  
> - /* Record current time in case interrupted by signal, or wedged */
>   trace_i915_gem_request_wait_begin(req);
> - before = ktime_get_raw_ns();
>  
>   /* Optimistic spin for the next jiffie before touching IRQs */
>   ret = __i915_spin_request(req, state);
> @@ -1331,11 +1334,10 @@ int __i915_wait_request(struct drm_i915_gem_request 
> *req,
>   finish_wait(&ring->irq_queue, &wait);
>  
>  out:
> - now = ktime_get_raw_ns();
>   trace_i915_gem_request_wait_end(req);
>  
>   if (timeout) {
> - s64 tres = *timeout - (now - before);
> + s64 tres = *timeout - (ktime_get_raw_ns() - before);
>  
>   *timeout = tres < 0 ? 0 : tres;
>  
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 01:29:14PM +, Tvrtko Ursulin wrote:
> 
> On 08/01/16 11:29, Tvrtko Ursulin wrote:
> >From: Tvrtko Ursulin 
> >
> >Purpose is to catch places which iterate the object VMA list
> >without holding the big lock.
> >
> >Implemented by open coding list_for_each_entry to make the
> >macro compatible with existing call sites.
> >
> >Signed-off-by: Tvrtko Ursulin 
> >Cc: Daniel Vetter 
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c  |  8 
> >  drivers/gpu/drm/i915/i915_drv.h  |  6 ++
> >  drivers/gpu/drm/i915/i915_gem.c  | 24 
> >  drivers/gpu/drm/i915/i915_gem_gtt.c  |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
> >  drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
> >  6 files changed, 26 insertions(+), 20 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> >b/drivers/gpu/drm/i915/i915_debugfs.c
> >index 714a45cf8a51..d7c2a3201161 100644
> >--- a/drivers/gpu/drm/i915/i915_debugfs.c
> >+++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >@@ -117,7 +117,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct 
> >drm_i915_gem_object *obj)
> > u64 size = 0;
> > struct i915_vma *vma;
> >
> >-list_for_each_entry(vma, &obj->vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > if (i915_is_ggtt(vma->vm) &&
> > drm_mm_node_allocated(&vma->node))
> > size += vma->node.size;
> >@@ -155,7 +155,7 @@ describe_obj(struct seq_file *m, struct 
> >drm_i915_gem_object *obj)
> >obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> > if (obj->base.name)
> > seq_printf(m, " (name: %d)", obj->base.name);
> >-list_for_each_entry(vma, &obj->vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > if (vma->pin_count > 0)
> > pin_count++;
> > }
> >@@ -164,7 +164,7 @@ describe_obj(struct seq_file *m, struct 
> >drm_i915_gem_object *obj)
> > seq_printf(m, " (display)");
> > if (obj->fence_reg != I915_FENCE_REG_NONE)
> > seq_printf(m, " (fence: %d)", obj->fence_reg);
> >-list_for_each_entry(vma, &obj->vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
> >i915_is_ggtt(vma->vm) ? "g" : "pp",
> >vma->node.start, vma->node.size);
> >@@ -342,7 +342,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> > stats->shared += obj->base.size;
> >
> > if (USES_FULL_PPGTT(obj->base.dev)) {
> >-list_for_each_entry(vma, &obj->vma_list, vma_link) {
> >+i915_gem_obj_for_each_vma(vma, obj) {
> > struct i915_hw_ppgtt *ppgtt;
> >
> > if (!drm_mm_node_allocated(&vma->node))
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> >b/drivers/gpu/drm/i915/i915_drv.h
> >index b77a5d84eac2..0406a020dfcc 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2852,6 +2852,12 @@ struct drm_i915_gem_object 
> >*i915_gem_object_create_from_data(
> >  void i915_gem_free_object(struct drm_gem_object *obj);
> >  void i915_gem_vma_destroy(struct i915_vma *vma);
> >
> >+#define i915_gem_obj_for_each_vma(vma, obj) \
> >+for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \
> >+ vma = list_first_entry(&(obj)->vma_list, typeof(*vma), vma_link);\
> >+ &vma->vma_link != (&(obj)->vma_list); \
> >+ vma = list_next_entry(vma, vma_link))
> >+
> 
> 
> Unfortunately error capture is not happy with this approach. Can't even see
> that error capture attempts to grab the mutex anywhere.
> 
> So what? Drop the idea or add a "doing error capture" flag somewhere?

Fix the bugs. Not surprise at all that we've screwed this up all over the
place ;-) Afaics modeset code isn't much better either ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 07/13] drm/i915: Introduce dedicated object VMA iterator

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:44:04AM +, Chris Wilson wrote:
> On Fri, Jan 08, 2016 at 11:29:46AM +, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin 
> > 
> > Purpose is to catch places which iterate the object VMA list
> > without holding the big lock.
> > 
> > Implemented by open coding list_for_each_entry to make the
> > macro compatible with existing call sites.
> > 
> > Signed-off-by: Tvrtko Ursulin 
> > Cc: Daniel Vetter 
> > +#define i915_gem_obj_for_each_vma(vma, obj) \
> > +   for (WARN_ON_ONCE(!mutex_is_locked(&(obj)->base.dev->struct_mutex)), \
> 
> Let's not go around adding WARN(!mutex_locked) to GEM code when
> lockdep_assert_held doesn't add overhead outside of testing.

Hm yeah I still prefere WARN_ON for modeset code (where it doesn't matter)
because of increased test coverage. But for gem it indeed makes more sense
to only do this for lockdep-enabled builds. CI runs with lockdep, so we're
good.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 11/13] drm/i915: Cache ringbuffer GTT address

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:50AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Purpose is to avoid calling i915_gem_obj_ggtt_offset from the
> interrupt context without the big lock held.
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c| 3 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
>  3 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b3795815d8e..70c511ef6b12 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -345,7 +345,6 @@ static int execlists_update_context(struct 
> drm_i915_gem_request *rq)
>   struct intel_engine_cs *ring = rq->ring;
>   struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
>   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
> - struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
>   struct page *page;
>   uint32_t *reg_state;
>  
> @@ -355,7 +354,7 @@ static int execlists_update_context(struct 
> drm_i915_gem_request *rq)
>   reg_state = kmap_atomic(page);
>  
>   reg_state[CTX_RING_TAIL+1] = rq->tail;
> - reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
> + reg_state[CTX_RING_BUFFER_START+1] = rq->ringbuf->gtt_start;
>  
>   if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
>   /* True 32b PPGTT with dynamic page allocation: update PDP
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 339701d7a9a5..9094ce254125 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1988,6 +1988,7 @@ void intel_unpin_ringbuffer_obj(struct intel_ringbuffer 
> *ringbuf)
>   else
>   iounmap(ringbuf->virtual_start);
>   ringbuf->virtual_start = NULL;
> + ringbuf->gtt_start = 0;
>   i915_gem_object_ggtt_unpin(ringbuf->obj);
>  }
>  
> @@ -2054,6 +2055,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device 
> *dev,
>   }
>   }
>  
> + ringbuf->gtt_start = i915_gem_obj_ggtt_offset(obj);
> +
>   return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0b91a4b77359..25d3716228ae 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -98,6 +98,7 @@ struct intel_ring_hangcheck {
>  struct intel_ringbuffer {
>   struct drm_i915_gem_object *obj;
>   void __iomem *virtual_start;
> + u64 gtt_start;

gtt_offset, because consistency. Or vma, as Chris suggested.
-Daniel

>  
>   struct intel_engine_cs *ring;
>   struct list_head link;
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 09/13] drm/i915: Remove two impossible asserts

2016-01-11 Thread Daniel Vetter
On Fri, Jan 08, 2016 at 11:29:48AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Engine initialization would have failed if those two weren't
> pinned and calling i915_gem_obj_is_pinned is illegal from irq
> context without the big lock held.
> 
> Signed-off-by: Tvrtko Ursulin 

Reviewed-by: Daniel Vetter 

> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index ffe004de22b0..5b3795815d8e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -350,8 +350,6 @@ static int execlists_update_context(struct 
> drm_i915_gem_request *rq)
>   uint32_t *reg_state;
>  
>   BUG_ON(!ctx_obj);
> - WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
> - WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
>  
>   page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
>   reg_state = kmap_atomic(page);
> -- 
> 1.9.1
> 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Support to enable TRTT on GEN9

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 01:09:50PM +0530, Goel, Akash wrote:
> 
> 
> On 1/10/2016 11:09 PM, Chris Wilson wrote:
> >On Sat, Jan 09, 2016 at 05:00:21PM +0530, akash.g...@intel.com wrote:
> >>From: Akash Goel 
> >>
> >>Gen9 has an additional address translation hardware support in form of
> >>Tiled Resource Translation Table (TR-TT) which provides an extra level
> >>of abstraction over PPGTT.
> >>This is useful for mapping Sparse/Tiled texture resources.
> >>Sparse resources are created as virtual-only allocations. Regions of the
> >>resource that the application intends to use is bound to the physical memory
> >>on the fly and can be re-bound to different memory allocations over the
> >>lifetime of the resource.
> >>
> >>TR-TT is tightly coupled with PPGTT, a new instance of TR-TT will be 
> >>required
> >>for a new PPGTT instance, but TR-TT may not enabled for every context.
> >>1/16th of the 48bit PPGTT space is earmarked for the translation by TR-TT,
> >>which such chunk to use is conveyed to HW through a register.
> >>Any GFX address, which lies in that reserved 44 bit range will be translated
> >>through TR-TT first and then through PPGTT to get the actual physical 
> >>address,
> >>so the output of translation from TR-TT will be a PPGTT offset.
> >>
> >>TRTT is constructed as a 3 level tile Table. Each tile is 64KB is size which
> >>leaves behind 44-16=28 address bits. 28bits are partitioned as 9+9+10, and
> >>each level is contained within a 4KB page hence L3 and L2 is composed of
> >>512 64b entries and L1 is composed of 1024 32b entries.
> >>
> >>There is a provision to keep TR-TT Tables in virtual space, where the pages 
> >>of
> >>TRTT tables will be mapped to PPGTT.
> >>Currently this is the supported mode, in this mode UMD will have a full 
> >>control
> >>on TR-TT management, with bare minimum support from KMD.
> >>So the entries of L3 table will contain the PPGTT offset of L2 Table pages,
> >>similarly entries of L2 table will contain the PPGTT offset of L1 Table 
> >>pages.
> >>The entries of L1 table will contain the PPGTT offset of BOs actually 
> >>backing
> >>the Sparse resources.
> >
> >>The assumption here is that UMD only will do the complete PPGTT address 
> >>space
> >>management and use the Soft Pin API for all the buffer objects associated 
> >>with
> >>a given Context.
> >
> >That is a poor assumption, and not one required for this to work.
> >
> This is not a strict requirement.
> But I thought that conflicts will be minimized if UMD itself can do
> the full address space management.
> At least UMD has to ensure that PPGTT offset of L3 table remains
> same throughout.

Yes, userspace must control that object, and that would require softpin
to preserve it across execbuffer calls. The kernel does not require that
all addresses be handled in userspace afterwards, that's the language I
wish to avoid. (Hence I don't like using "assumption" as that just
invites userspace to break the kernel.)
 
> >>So UMD will also have to allocate the L3/L2/L1 table pages
> >>as a regular GEM BO only & assign them a PPGTT address through the Soft Pin 
> >>API.
> >>UMD would have to emit the MI_STORE_DATA_IMM commands in the batch buffer to
> >>program the relevant entries of L3/L2/L1 tables.
> >
> >This only applies to te TR-TT L1-L3 cache, right?
> >
> Yes applies only to the TR-TT L1-L3 tables.
> The backing pages of L3/L2/L1 tables shall be allocated as a BO,
> which should be assigned a PPGTT address.
> The table entries could be written directly also by UMD by mmapping
> the table BOs, but adding MI_STORE_DATA_IMM commands in the batch
> buffer itself would help to achieve serialization (implicitly).

Can you tighten up the phrasing here? My first read was that you indeed
for all PTE writes to be in userspace, which is scary.

"UMD will then allocate the L3/L32/L1 page tables for TR-TT as a regular
bo, and will use softpin to assign it to the l3_table_address when used.
UMD will also maintain the entries in the TR-TT page tables using
regular batch commands (MI_STORE_DATA_IMM), or via mmapping of the page
table bo."

> >>autonomously and KMD will be oblivious of it.
> >>The BOs must not be assigned an address from TR-TT segment, they will be 
> >>mapped
> >
> >s/The BOs/Any object/
> >
> Ok will use 'Any object'
> >>to PPGTT in a regular way by KMD
> >
> >s/using the Soft Pin offset provided by UMD// as this is irrelevant.
> >
> You mean to say that it is needless or inappropriate to state that
> KMD will use the Soft PIN offset provided by UMD, it doesn't matter
> that whether the Soft PIN offset is used or KMD itself assigns an
> address.

I just want to avoid implying that userspace must use softpin on every
single bo for this to work. (Mainly because I don't really want
userspace to have to do full address space management, as we will always
have to do the double check inside the kernel. Unless there is a real
need (e.g. svm), I'd rather improve the kernel allocator/verification, rather
than 

Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 09:00:13AM +0100, Daniel Vetter wrote:
> On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote:
> > gem_concurrent_blit tries to ensure that it doesn't try and run a test
> > that would grind the system to a halt, i.e. unexpectedly cause swap
> > thrashing. It currently calls intel_require_memory(), but outside of
> > the subtest (as the tests use fork, it cannot do requirement testing
> > within the test children) - but intel_require_memory() calls
> > igt_require() and triggers and abort. Wrapping that initial require
> > within an igt_fixture() stops the abort(), but also prevents any further
> > testing.
> > 
> > This patch restructures the requirement checking to ordinary conditions,
> > which though allowing the test to run, also prevents listing of subtests
> > on machines which cannot handle them.
> 
> 
> > ---
> >  lib/igt_aux.h  |  2 ++
> >  lib/intel_os.c | 53 +++-
> >  tests/gem_concurrent_all.c | 67 
> > +-
> >  3 files changed, 85 insertions(+), 37 deletions(-)
> > 
> > diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> > index 6e11ee6..5a88c2a 100644
> > --- a/lib/igt_aux.h
> > +++ b/lib/igt_aux.h
> > @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void);
> >  
> >  #define CHECK_RAM 0x1
> >  #define CHECK_SWAP 0x2
> > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode,
> > +uint64_t *out_required, uint64_t *out_total);
> >  void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
> >  int intel_num_objects_for_memory(uint32_t size, unsigned mode);
> >  
> > diff --git a/lib/intel_os.c b/lib/intel_os.c
> > index dba9e17..90f9bb3 100644
> > --- a/lib/intel_os.c
> > +++ b/lib/intel_os.c
> > @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void)
> > return retval / (1024*1024);
> >  }
> >  
> 
> Please add the usual gtkdoc boilerplate here with a mention of
> intel_check_memory. Ack with that.

You were meant to object about how this breaks test runners and suggest
how we can do this without that breakage.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ warning: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test gem_storedw_loop:
Subgroup basic-render:
dmesg-warn -> PASS   (bdw-ultra)
dmesg-warn -> PASS   (skl-i7k-2) UNSTABLE
Test kms_flip:
Subgroup basic-flip-vs-dpms:
dmesg-warn -> PASS   (ilk-hp8440p)
Subgroup basic-flip-vs-modeset:
pass   -> DMESG-WARN (ilk-hp8440p)
Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-b:
dmesg-warn -> PASS   (byt-nuc)

bdw-ultratotal:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6  
bsw-nuc-2total:141  pass:114  dwarn:3   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
hsw-xps12total:138  pass:133  dwarn:1   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
skl-i5k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:132  dwarn:1   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

Results at /archive/results/CI_IGT_test/Patchwork_1110/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

HEAD is now at ff88655 drm-intel-nightly: 2016y-01m-11d-07h-30m-16s UTC 
integration manifest
Applying: drm/i915: Use passed plane state for sprite planes, v4.
Using index info to reconstruct a base tree...
M   drivers/gpu/drm/i915/intel_drv.h
M   drivers/gpu/drm/i915/intel_sprite.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/intel_sprite.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/intel_sprite.c
Patch failed at 0001 drm/i915: Use passed plane state for sprite planes, v4.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH igt] core/sighelper: Interrupt everyone in the process group

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 08:57:33AM +0100, Daniel Vetter wrote:
> On Fri, Jan 08, 2016 at 08:44:29AM +, Chris Wilson wrote:
> > Some stress tests create both the signal helper and a lot of competing
> > processes. In these tests, the parent is just waiting upon the children,
> > and the intention is not to keep waking up the waiting parent, but to
> > keep interrupting the children (as we hope to trigger races in our
> > kernel code). kill(-pid) sends the signal to all members of the process
> > group, not just the target pid.
> 
> I don't really have any clue about unix pgroups, but the -pid disappeared
> compared to the previous version.

-getppid().

I felt it was clearer to pass along the "negative pid = process group"
after setting up the process group.

> > We also switch from using SIGUSR1 to SIGCONT to paper over a race
> > condition when forking children that saw the default signal action being
> > run (and thus killing the child).
> 
> I thought I fixed that race by first installing the new signal handler,
> then forking. Ok, rechecked and it's the SYS_getpid stuff, so another
> race. Still I thought signal handlers would survive a fork?

So did irc. They didn't appear to as the children would sporadically
die with SIGUSR1.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/gen9: Calculate edram size

2016-01-11 Thread Chris Wilson
On Mon, Jan 11, 2016 at 08:50:43AM +0100, Daniel Vetter wrote:
> On Fri, Jan 08, 2016 at 06:58:45PM +0200, Mika Kuoppala wrote:
> > With gen9+ the edram capabilities are defined so
> > that we can calculate the edram (ellc) size accordingly.
> > 
> > Note that there are undefined combinations for some subset of
> > edram capability bits. Return the closest size for undefined indexes.
> > Even if we get it wrong with beginning of future gen enabling, the size
> > information is currently only used for boot message and in debugfs entry.
> > 
> > Signed-off-by: Mika Kuoppala 
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h | 14 ++
> >  drivers/gpu/drm/i915/intel_uncore.c |  9 +
> >  2 files changed, 19 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index f88b75ec5047..52283c44a0c1 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -6820,6 +6820,20 @@ enum skl_disp_power_wells {
> >  #defineIDIHASHMSK(x)   (((x) & 0x3f) << 16)
> >  #define  HSW_EDRAM_CAP _MMIO(0x120010)
> >  #defineEDRAM_ENABLED   0x1
> > +#defineEDRAM_NUM_BANKS(cap)(((cap) >> 1) & 0xf)
> > +#define__EDRAM_WAYS(cap)   (((cap) >> 5) & 0x7)
> > +#defineEDRAM_NUM_WAYS(cap) ({ \
> > +   int __ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; \
> > +   __ways[__EDRAM_WAYS(cap)]; \
> > +})
> > +#define__EDRAM_SETS(cap)   (((cap) >> 8) & 0x3)
> > +#defineEDRAM_NUM_SETS(cap) ({ \
> > +   int __sets[4] = { 1, 1, 2, 2 }; \
> > +   __sets[__EDRAM_SETS(cap)]; \
> > +})
> > +#defineEDRAM_SIZE(cap) (EDRAM_NUM_BANKS(cap) * \
> > +EDRAM_NUM_WAYS(cap) * \
> > +EDRAM_NUM_SETS(cap))
> 
> Please just make a function out of this, no reason to make it into a
> hard-to-read macro. E.g. gen9_edram_size or whatever is the first gen this
> starts to be valid for.

May I humbly suggest intel_uncore_edram_size_mb() ? ;)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH igt] core/sighelper: Interrupt everyone in the process group

2016-01-11 Thread Daniel Vetter
On Mon, Jan 11, 2016 at 08:54:59AM +, Chris Wilson wrote:
> On Mon, Jan 11, 2016 at 08:57:33AM +0100, Daniel Vetter wrote:
> > On Fri, Jan 08, 2016 at 08:44:29AM +, Chris Wilson wrote:
> > > Some stress tests create both the signal helper and a lot of competing
> > > processes. In these tests, the parent is just waiting upon the children,
> > > and the intention is not to keep waking up the waiting parent, but to
> > > keep interrupting the children (as we hope to trigger races in our
> > > kernel code). kill(-pid) sends the signal to all members of the process
> > > group, not just the target pid.
> > 
> > I don't really have any clue about unix pgroups, but the -pid disappeared
> > compared to the previous version.
> 
> -getppid().
> 
> I felt it was clearer to pass along the "negative pid = process group"
> after setting up the process group.

Oh, I was blind ... Yeah looks better, but please add a bigger comment
around that code explaining why we need a group and why we use SIG_CONT.
With that acked-by: me.

Cheers, Daniel

> > > We also switch from using SIGUSR1 to SIGCONT to paper over a race
> > > condition when forking children that saw the default signal action being
> > > run (and thus killing the child).
> > 
> > I thought I fixed that race by first installing the new signal handler,
> > then forking. Ok, rechecked and it's the SYS_getpid stuff, so another
> > race. Still I thought signal handlers would survive a fork?
> 
> So did irc. They didn't appear to as the children would sporadically
> die with SIGUSR1.

Could be that libc is doing something funny, iirc they have piles of fork
helpers to make fork more reliable (breaking locks and stuff like that),
but then in turn break the abstraction.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH igt] gem_concurrent_blit: Don't call igt_require() outside of a subtest/fixture

2016-01-11 Thread Daniel Vetter
On Mon, Jan 11, 2016 at 08:52:24AM +, Chris Wilson wrote:
> On Mon, Jan 11, 2016 at 09:00:13AM +0100, Daniel Vetter wrote:
> > On Fri, Jan 08, 2016 at 09:10:38AM +, Chris Wilson wrote:
> > > gem_concurrent_blit tries to ensure that it doesn't try and run a test
> > > that would grind the system to a halt, i.e. unexpectedly cause swap
> > > thrashing. It currently calls intel_require_memory(), but outside of
> > > the subtest (as the tests use fork, it cannot do requirement testing
> > > within the test children) - but intel_require_memory() calls
> > > igt_require() and triggers and abort. Wrapping that initial require
> > > within an igt_fixture() stops the abort(), but also prevents any further
> > > testing.
> > > 
> > > This patch restructures the requirement checking to ordinary conditions,
> > > which though allowing the test to run, also prevents listing of subtests
> > > on machines which cannot handle them.
> > 
> > 
> > > ---
> > >  lib/igt_aux.h  |  2 ++
> > >  lib/intel_os.c | 53 +++-
> > >  tests/gem_concurrent_all.c | 67 
> > > +-
> > >  3 files changed, 85 insertions(+), 37 deletions(-)
> > > 
> > > diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> > > index 6e11ee6..5a88c2a 100644
> > > --- a/lib/igt_aux.h
> > > +++ b/lib/igt_aux.h
> > > @@ -88,6 +88,8 @@ uint64_t intel_get_total_swap_mb(void);
> > >  
> > >  #define CHECK_RAM 0x1
> > >  #define CHECK_SWAP 0x2
> > > +int __intel_check_memory(uint32_t count, uint64_t size, unsigned mode,
> > > +  uint64_t *out_required, uint64_t *out_total);
> > >  void intel_require_memory(uint32_t count, uint64_t size, unsigned mode);
> > >  int intel_num_objects_for_memory(uint32_t size, unsigned mode);
> > >  
> > > diff --git a/lib/intel_os.c b/lib/intel_os.c
> > > index dba9e17..90f9bb3 100644
> > > --- a/lib/intel_os.c
> > > +++ b/lib/intel_os.c
> > > @@ -192,6 +192,38 @@ intel_get_total_swap_mb(void)
> > >   return retval / (1024*1024);
> > >  }
> > >  
> > 
> > Please add the usual gtkdoc boilerplate here with a mention of
> > intel_check_memory. Ack with that.
> 
> You were meant to object about how this breaks test runners and suggest
> how we can do this without that breakage.

Oh I didn't realize that you're moving things out of
igt_fixture/igt_subtest blocks again ... Ack retracted, I'll check what
happened to my coffee meanwhile ;-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ failure: Fi.CI.BAT

2016-01-11 Thread Patchwork
== Summary ==

Built on ff88655b3a5467bbc3be8c67d3e05ebf182557d3 drm-intel-nightly: 
2016y-01m-11d-07h-30m-16s UTC integration manifest

Test gem_storedw_loop:
Subgroup basic-render:
pass   -> DMESG-WARN (skl-i5k-2) UNSTABLE
dmesg-warn -> PASS   (bdw-ultra)
Test kms_flip:
Subgroup basic-flip-vs-dpms:
dmesg-warn -> PASS   (ilk-hp8440p)
Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-b:
pass   -> DMESG-WARN (ilk-hp8440p)
dmesg-warn -> PASS   (byt-nuc)

bdw-ultratotal:138  pass:132  dwarn:0   dfail:0   fail:0   skip:6  
bsw-nuc-2total:141  pass:114  dwarn:3   dfail:0   fail:0   skip:24 
byt-nuc  total:141  pass:119  dwarn:7   dfail:0   fail:0   skip:15 
hsw-brixbox  total:141  pass:134  dwarn:0   dfail:0   fail:0   skip:7  
hsw-gt2  total:141  pass:137  dwarn:0   dfail:0   fail:0   skip:4  
hsw-xps12total:138  pass:133  dwarn:1   dfail:0   fail:0   skip:4  
ilk-hp8440p  total:141  pass:100  dwarn:4   dfail:0   fail:0   skip:37 
skl-i5k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
skl-i7k-2total:141  pass:131  dwarn:2   dfail:0   fail:0   skip:8  
snb-dellxps  total:141  pass:122  dwarn:5   dfail:0   fail:0   skip:14 
snb-x220ttotal:141  pass:122  dwarn:5   dfail:0   fail:1   skip:13 

HANGED ivb-t430s in igt@kms_pipe_crc_basic@nonblocking-crc-pipe-b

Results at /archive/results/CI_IGT_test/Patchwork_1112/

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 002/190] drm/i915: Move the mb() following release-mmap into release-mmap

2016-01-11 Thread Chris Wilson
As paranoia, we want to ensure that the CPU's PTEs have been revoked for
the object before we return from i915_gem_release_mmap(). This allows us
to rely on there being no outstanding memory accesses and guarantees
serialisation of the code against concurrent access just by calling
i915_gem_release_mmap().

v2: Reduce the mb() into a wmb() following the revoke.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: "Goel, Akash" 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c60e04fc09c..3ab529669448 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1962,11 +1962,21 @@ out:
 void
 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 {
+   /* Serialisation between user GTT access and our code depends upon
+* revoking the CPU's PTE whilst the mutex is held. The next user
+* pagefault then has to wait until we release the mutex.
+*/
+   lockdep_assert_held(&obj->base.dev->struct_mutex);
+
if (!obj->fault_mappable)
return;
 
drm_vma_node_unmap(&obj->base.vma_node,
   obj->base.dev->anon_inode->i_mapping);
+
+   /* Ensure that the CPU's PTE are revoked before we return */
+   wmb();
+
obj->fault_mappable = false;
 }
 
@@ -3269,9 +3279,6 @@ static void i915_gem_object_finish_gtt(struct 
drm_i915_gem_object *obj)
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
return;
 
-   /* Wait for any direct GTT access to complete */
-   mb();
-
old_read_domains = obj->base.read_domains;
old_write_domain = obj->base.write_domain;
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 004/190] drm/i915: Fix some invalid requests cancellations

2016-01-11 Thread Chris Wilson
As we add the VMA to the request early, it may be cancelled during
execbuf reservation. This will leave the context object pointing to a
dangling request; i915_wait_request() simply skips the wait and so we
may unbind the object whilst it is still active.

However, if at any point we make a change to the hardware (and equally
importantly our bookkeeping in the driver), we cannot cancel the request
as what has already been written must be submitted. Submitting a partial
request is far easier than trying to unwind the incomplete change.

Unfortunately this patch undoes the excess breadcrumb usage that olr
prevented, e.g. if we interrupt batchbuffer submission then we submit
the requests along with the memory writes and interrupt (even though we
do no real work). Disassociating requests from breadcrumbs (and
semaphores) is a topic for a past/future series, but now much more
important.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Cc: sta...@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_drv.h|  1 -
 drivers/gpu/drm/i915/i915_gem.c|  7 ++-
 drivers/gpu/drm/i915/i915_gem_context.c| 21 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +---
 drivers/gpu/drm/i915/intel_display.c   |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  1 -
 6 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 747d2d84a18c..ec20814adb0c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2813,7 +2813,6 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
 struct drm_file *file_priv);
 void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
struct drm_i915_gem_request *req);
-void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params 
*params);
 int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3ab529669448..fd24877eb0a0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3384,12 +3384,9 @@ int i915_gpu_idle(struct drm_device *dev)
return ret;
 
ret = i915_switch_context(req);
-   if (ret) {
-   i915_gem_request_cancel(req);
-   return ret;
-   }
-
i915_add_request_no_flush(req);
+   if (ret)
+   return ret;
}
 
ret = intel_ring_idle(ring);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index c25083c78ba7..e5e9a8918f19 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -661,7 +661,6 @@ static int do_switch(struct drm_i915_gem_request *req)
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct intel_context *from = ring->last_context;
u32 hw_flags = 0;
-   bool uninitialized = false;
int ret, i;
 
if (from != NULL && ring == &dev_priv->ring[RCS]) {
@@ -768,6 +767,15 @@ static int do_switch(struct drm_i915_gem_request *req)
to->remap_slice &= ~(1init_context) {
+   ret = ring->init_context(req);
+   if (ret)
+   goto unpin_out;
+   }
+   to->legacy_hw_ctx.initialized = true;
+   }
+
/* The backing object for the context is done after switching to the
 * *next* context. Therefore we cannot retire the previous context until
 * the next context has already started running. In fact, the below code
@@ -791,21 +799,10 @@ static int do_switch(struct drm_i915_gem_request *req)
i915_gem_context_unreference(from);
}
 
-   uninitialized = !to->legacy_hw_ctx.initialized;
-   to->legacy_hw_ctx.initialized = true;
-
 done:
i915_gem_context_reference(to);
ring->last_context = to;
 
-   if (uninitialized) {
-   if (ring->init_context) {
-   ret = ring->init_context(req);
-   if (ret)
-   DRM_ERROR("ring init context: %d\n", ret);
-   }
-   }
-
return 0;
 
 unpin_out:
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dccb517361b3..b8186bd061c1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1136,7 +1136,7 @@ i915_gem_ex

[Intel-gfx] [PATCH 001/190] drm: Release driver references to handle before making it available again

2016-01-11 Thread Chris Wilson
When userspace closes a handle, we remove it from the file->object_idr
and then tell the driver to drop its references to that file/handle.
However, as the file/handle is already available again for reuse, it may
be reallocated back to userspace and active on a new object before the
driver has had a chance to drop the old file/handle references.

Whilst calling back into the driver, we have to drop the
file->table_lock spinlock and so to prevent reusing the closed handle we
mark that handle as stale in the idr, perform the callback and then
remove the handle. We set the stale handle to point to the NULL object,
then any idr_find() whilst the driver is removing the handle will return
NULL, just as if the handle is already removed from idr.

v2: Use NULL rather than an ERR_PTR to avoid having to adjust callers.
idr_alloc() tracks existing handles using an internal bitmap, so we are
free to use the NULL object as our stale identifier.

Signed-off-by: Chris Wilson 
Cc: dri-de...@lists.freedesktop.org
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Rob Clark 
Cc: Ville Syrjälä 
Cc: Thierry Reding 
---
 drivers/gpu/drm/drm_gem.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2e8c77e71e1f..d1909d1a1eb4 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -294,18 +294,21 @@ drm_gem_handle_delete(struct drm_file *filp, u32 handle)
spin_lock(&filp->table_lock);
 
/* Check if we currently have a reference on the object */
-   obj = idr_find(&filp->object_idr, handle);
-   if (obj == NULL) {
+   obj = idr_replace(&filp->object_idr, NULL, handle);
+   if (IS_ERR(obj)) {
spin_unlock(&filp->table_lock);
return -EINVAL;
}
dev = obj->dev;
+   spin_unlock(&filp->table_lock);
 
/* Release reference and decrement refcount. */
+   drm_gem_object_release_handle(handle, obj, filp);
+
+   spin_lock(&filp->table_lock);
idr_remove(&filp->object_idr, handle);
spin_unlock(&filp->table_lock);
 
-   drm_gem_object_release_handle(handle, obj, filp);
return 0;
 }
 EXPORT_SYMBOL(drm_gem_handle_delete);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 006/190] drm/i915: Add GEM debugging Kconfig option

2016-01-11 Thread Chris Wilson
Currently there is a #define to enable extra BUG_ON for debugging
requests and associated activities. I want to expand its use to cover
all of GEM internals (so that we can saturate the code with asserts).
We can add a Kconfig option to make it easier to enable - with the usual
caveats of not enabling unless explicitly requested.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Kconfig.debug |  8 
 drivers/gpu/drm/i915/i915_drv.h|  6 ++
 drivers/gpu/drm/i915/i915_gem.c| 12 +---
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug 
b/drivers/gpu/drm/i915/Kconfig.debug
index 1f10ee228eda..7fa6b97635e5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -10,3 +10,11 @@ config DRM_I915_WERROR
---help---
  Add -Werror to the build flags for (and only for) i915.ko.
  Do not enable this unless you are writing code for the i915.ko module.
+
+config DRM_I915_DEBUG_GEM
+   bool "Insert extra checks into the GEM internals"
+   default n
+   depends on DRM_I915_WERROR
+   ---help---
+ Enable extra sanity checks (including BUGs) that may slow the
+  system down and if hit hang the machine.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec20814adb0c..1a6168affadd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2271,6 +2271,12 @@ struct drm_i915_gem_request {
 
 };
 
+#ifdef CONFIG_DRM_I915_DEBUG_GEM
+#define GEM_BUG_ON(expr) BUG_ON(expr)
+#else
+#define GEM_BUG_ON(expr)
+#endif
+
 int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct intel_context *ctx,
   struct drm_i915_gem_request **req_out);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd24877eb0a0..99fd6aa4dd62 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,8 +38,6 @@
 #include 
 #include 
 
-#define RQ_BUG_ON(expr)
-
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object 
*obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object 
*obj);
 static void
@@ -1520,7 +1518,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
 
i915_gem_object_retire__read(obj, i);
}
-   RQ_BUG_ON(obj->active);
+   GEM_BUG_ON(obj->active);
}
 
return 0;
@@ -2430,8 +2428,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 static void
 i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
 {
-   RQ_BUG_ON(obj->last_write_req == NULL);
-   RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
+   GEM_BUG_ON(obj->last_write_req == NULL);
+   GEM_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
 
i915_gem_request_assign(&obj->last_write_req, NULL);
intel_fb_obj_flush(obj, true, ORIGIN_CS);
@@ -2442,8 +2440,8 @@ i915_gem_object_retire__read(struct drm_i915_gem_object 
*obj, int ring)
 {
struct i915_vma *vma;
 
-   RQ_BUG_ON(obj->last_read_req[ring] == NULL);
-   RQ_BUG_ON(!(obj->active & (1 << ring)));
+   GEM_BUG_ON(obj->last_read_req[ring] == NULL);
+   GEM_BUG_ON(!(obj->active & (1 << ring)));
 
list_del_init(&obj->ring_list[ring]);
i915_gem_request_assign(&obj->last_read_req[ring], NULL);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 007/190] drm/i915: Hide the atomic_read(reset_counter) behind a helper

2016-01-11 Thread Chris Wilson
This is principally a little bit of syntatic sugar to hide the
atomic_read()s throughout the code to retrieve the current reset_counter.
It also provides the other utility functions to check the reset state on the
already read reset_counter, so that (in later patches) we can read it once
and do multiple tests rather than risk the value changing between tests.

v2: Be strictly on converting existing i915_reset_in_progress() over to
the more verbose i915_reset_in_progress_or_wedged().

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h | 32 
 drivers/gpu/drm/i915/i915_gem.c | 16 
 drivers/gpu/drm/i915/i915_irq.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c| 18 +++---
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  4 ++--
 7 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index e3377abc0d4d..932af05b8eec 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4696,7 +4696,7 @@ i915_wedged_get(void *data, u64 *val)
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
 
-   *val = atomic_read(&dev_priv->gpu_error.reset_counter);
+   *val = i915_reset_counter(&dev_priv->gpu_error);
 
return 0;
 }
@@ -4715,7 +4715,7 @@ i915_wedged_set(void *data, u64 val)
 * while it is writing to 'i915_wedged'
 */
 
-   if (i915_reset_in_progress(&dev_priv->gpu_error))
+   if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error))
return -EAGAIN;
 
intel_runtime_pm_get(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1a6168affadd..b274237726de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2983,20 +2983,44 @@ void i915_gem_retire_requests_ring(struct 
intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
  bool interruptible);
 
+static inline u32 i915_reset_counter(struct i915_gpu_error *error)
+{
+   return atomic_read(&error->reset_counter);
+}
+
+static inline bool __i915_reset_in_progress(u32 reset)
+{
+   return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
+}
+
+static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
+{
+   return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
+}
+
+static inline bool __i915_terminally_wedged(u32 reset)
+{
+   return unlikely(reset & I915_WEDGED);
+}
+
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-   return unlikely(atomic_read(&error->reset_counter)
-   & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
+   return __i915_reset_in_progress(i915_reset_counter(error));
+}
+
+static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error 
*error)
+{
+   return __i915_reset_in_progress_or_wedged(i915_reset_counter(error));
 }
 
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
-   return atomic_read(&error->reset_counter) & I915_WEDGED;
+   return __i915_terminally_wedged(i915_reset_counter(error));
 }
 
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
-   return ((atomic_read(&error->reset_counter) & ~I915_WEDGED) + 1) / 2;
+   return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
 }
 
 static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99fd6aa4dd62..78bf980a69bf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -83,7 +83,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
int ret;
 
-#define EXIT_COND (!i915_reset_in_progress(error) || \
+#define EXIT_COND (!i915_reset_in_progress_or_wedged(error) || \
   i915_terminally_wedged(error))
if (EXIT_COND)
return 0;
@@ -,7 +,7 @@ int
 i915_gem_check_wedge(struct i915_gpu_error *error,
 bool interruptible)
 {
-   if (i915_reset_in_progress(error)) {
+   if (i915_reset_in_progress_or_wedged(error)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
 * -EIO unconditionally for these. */
if (!interruptible)
@@ -1295,7 +1295,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
/* We need to check whether any gpu reset happened in between
 * the caller grabbing the seqno and now ... */
-   if (reset_counter != 
atomic_read(&dev_priv->gpu_error.reset_counter)) {
+   if 

[Intel-gfx] [PATCH 011/190] drm/i915: Simplify reset_counter handling during atomic modesetting

2016-01-11 Thread Chris Wilson
Now that the reset_counter is stored on the request, we can rearrange
the code to handle reading the counter versus waiting during the atomic
modesetting for readibility (by deleting the hairiest of codes).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_display.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 4f36313f31ac..ee0ec72b16b4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13504,9 +13504,9 @@ static int intel_atomic_prepare_commit(struct 
drm_device *dev,
return ret;
 
ret = drm_atomic_helper_prepare_planes(dev, state);
-   if (!ret && !async && 
!i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) {
-   mutex_unlock(&dev->struct_mutex);
+   mutex_unlock(&dev->struct_mutex);
 
+   if (!ret && !async) {
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
to_intel_plane_state(plane_state);
@@ -13520,19 +13520,15 @@ static int intel_atomic_prepare_commit(struct 
drm_device *dev,
/* Swallow -EIO errors to allow updates during hw 
lockup. */
if (ret == -EIO)
ret = 0;
-
-   if (ret)
+   if (ret) {
+   mutex_lock(&dev->struct_mutex);
+   drm_atomic_helper_cleanup_planes(dev, state);
+   mutex_unlock(&dev->struct_mutex);
break;
+   }
}
-
-   if (!ret)
-   return 0;
-
-   mutex_lock(&dev->struct_mutex);
-   drm_atomic_helper_cleanup_planes(dev, state);
}
 
-   mutex_unlock(&dev->struct_mutex);
return ret;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 005/190] drm/i915: Force clean compilation with -Werror

2016-01-11 Thread Chris Wilson
Our driver compiles clean (nowadays thanks to 0day) but for me, at least,
it would be beneficial if the compiler threw an error rather than a
warning when it found a piece of suspect code. (I use this to
compile-check patch series and want to break on the first compiler error
in order to fix the patch.)

v2: Kick off a new "Debugging" submenu for i915.ko

At this point, we applied it to the kernel and promptly kicked it out
again as it broke buildbots (due to a compiler warning on 32bits):

commit 908d759b210effb33d927a8cb6603a16448474e4
Author: Daniel Vetter 
Date:   Tue May 26 07:46:21 2015 +0200

Revert "drm/i915: Force clean compilation with -Werror"

v3: Avoid enabling -Werror for allyesconfig/allmodconfig builds, using
COMPILE_TEST as a suitable proxy suggested by Andrew Morton. (Damien)
Only make the option available for EXPERT to reinforce that the option
should not be casually enabled.

Signed-off-by: Chris Wilson 
Cc: Jani Nikula 
Cc: Damien Lespiau 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/Kconfig   |  6 ++
 drivers/gpu/drm/i915/Kconfig.debug | 12 
 drivers/gpu/drm/i915/Makefile  |  2 ++
 3 files changed, 20 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/Kconfig.debug

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index b979295aab82..33e8563c2f99 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -59,3 +59,9 @@ config DRM_I915_USERPTR
  selected to enabled full userptr support.
 
  If in doubt, say "Y".
+
+menu "drm/i915 Debugging"
+depends on DRM_I915
+depends on EXPERT
+source drivers/gpu/drm/i915/Kconfig.debug
+endmenu
diff --git a/drivers/gpu/drm/i915/Kconfig.debug 
b/drivers/gpu/drm/i915/Kconfig.debug
new file mode 100644
index ..1f10ee228eda
--- /dev/null
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -0,0 +1,12 @@
+config DRM_I915_WERROR
+   bool "Force GCC to throw an error instead of a warning when compiling"
+   default n
+   # As this may inadvertently break the build, only allow the user
+   # to shoot oneself in the foot iff they aim really hard
+   depends on EXPERT
+   # We use the dependency on !COMPILE_TEST to not be enabled in
+   # allmodconfig or allyesconfig configurations
+   depends on !COMPILE_TEST
+   ---help---
+ Add -Werror to the build flags for (and only for) i915.ko.
+ Do not enable this unless you are writing code for the i915.ko module.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0851de07bd13..1e9895b9a546 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
+subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
+
 # Please keep these build lists sorted!
 
 # core driver code
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 017/190] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+

2016-01-11 Thread Chris Wilson
In order to ensure seqno/irq coherency, we current read a ring register.
We are not sure quite how it works, only that is does. Experiments show
that e.g. doing a clflush(seqno) instead is not sufficient, but we can
remove the forcewake dance from the mmio access.

v2: Baytrail wants a clflush too.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 99780b674311..a1d43b2c7077 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1490,10 +1490,21 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool 
lazy_coherency)
 {
/* Workaround to force correct ordering between irq and seqno writes on
 * ivb (and maybe also on snb) by reading from a CS register (like
-* ACTHD) before reading the status page. */
+* ACTHD) before reading the status page.
+*
+* Note that this effectively effectively stalls the read by the time
+* it takes to do a memory transaction, which more or less ensures
+* that the write from the GPU has sufficient time to invalidate
+* the CPU cacheline. Alternatively we could delay the interrupt from
+* the CS ring to give the write time to land, but that would incur
+* a delay after every batch i.e. much more frequent than a delay
+* when waiting for the interrupt (with the same net latency).
+*/
if (!lazy_coherency) {
struct drm_i915_private *dev_priv = ring->dev->dev_private;
-   POSTING_READ(RING_ACTHD(ring->mmio_base));
+   POSTING_READ_FW(RING_ACTHD(ring->mmio_base));
+
+   intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
}
 
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 010/190] drm/i915: Store the reset counter when constructing a request

2016-01-11 Thread Chris Wilson
As the request is only valid during the same global reset epoch, we can
record the current reset_counter when constructing the request and reuse
it when waiting upon that request in future. This removes a very hairy
atomic check serialised by the struct_mutex at the time of waiting and
allows us to transfer those waits to a central dispatcher for all
waiters and all requests.

PS: With per-engine resets, we obviously cannot assume a global reset
epoch for the requests - a per-engine epoch makes the most sense. The
challenge then is how to handle checking in the waiter for when to break
the wait, as the fine-grained reset may also want to requeue the
request (i.e. the assumption that just because the epoch changes the
request is completed may be broken - or we just avoid breaking that
assumption with the fine-grained resets).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by:: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 40 +++--
 drivers/gpu/drm/i915/intel_display.c|  7 +-
 drivers/gpu/drm/i915/intel_lrc.c|  7 --
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 -
 5 files changed, 15 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 60531df3844c..f74bca326b79 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2191,6 +2191,7 @@ struct drm_i915_gem_request {
/** On Which ring this request was generated */
struct drm_i915_private *i915;
struct intel_engine_cs *ring;
+   unsigned reset_counter;
 
 /** GEM sequence number associated with the previous request,
  * when the HWS breadcrumb is equal to this the GPU is processing
@@ -3050,7 +3051,6 @@ void __i915_add_request(struct drm_i915_gem_request *req,
 #define i915_add_request_no_flush(req) \
__i915_add_request(req, NULL, false)
 int __i915_wait_request(struct drm_i915_gem_request *req,
-   unsigned reset_counter,
bool interruptible,
s64 *timeout,
struct intel_rps_client *rps);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2cdd20b3aeaf..56069bdada85 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1212,7 +1212,6 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req, int state)
 /**
  * __i915_wait_request - wait until execution of request has finished
  * @req: duh!
- * @reset_counter: reset sequence associated with the given request
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time 
remaining
  *
@@ -1227,7 +1226,6 @@ static int __i915_spin_request(struct 
drm_i915_gem_request *req, int state)
  * errno with remaining time filled in timeout argument.
  */
 int __i915_wait_request(struct drm_i915_gem_request *req,
-   unsigned reset_counter,
bool interruptible,
s64 *timeout,
struct intel_rps_client *rps)
@@ -1286,7 +1284,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
/* We need to check whether any gpu reset happened in between
 * the caller grabbing the seqno and now ... */
-   if (reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
+   if (req->reset_counter != 
i915_reset_counter(&dev_priv->gpu_error)) {
/* ... but upgrade the -EAGAIN to an -EIO if the gpu
 * is truely gone. */
ret = i915_gem_check_wedge(&dev_priv->gpu_error, 
interruptible);
@@ -1459,13 +1457,7 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 
-   ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-   if (ret)
-   return ret;
-
-   ret = __i915_wait_request(req,
- i915_reset_counter(&dev_priv->gpu_error),
- interruptible, NULL, NULL);
+   ret = __i915_wait_request(req, interruptible, NULL, NULL);
if (ret)
return ret;
 
@@ -1540,7 +1532,6 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_request *requests[I915_NUM_RINGS];
-   unsigned reset_counter;
int ret, i, n = 0;
 
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
@@ -1549,12 +1540,6 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (!obj->active)
return 0;
 
-   ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
-  

[Intel-gfx] [PATCH 020/190] drm/i915: Remove the lazy_coherency parameter from request-completed?

2016-01-11 Thread Chris Wilson
Now that we have split out the seqno-barrier from the
engine->get_seqno() callback itself, we can move the users of the
seqno-barrier to the required callsites simplifying the common code and
making the required workaround handling much more explicit.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h  | 17 -
 drivers/gpu/drm/i915/i915_gem.c  | 24 
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_pm.c  |  4 ++--
 5 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 1499e2337e5d..d09e48455dcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -601,7 +601,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
   ring->get_seqno(ring),
-  
i915_gem_request_completed(work->flip_queued_req, true));
+  
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
seq_printf(m, "Flip queued on frame %d, (was ready on 
frame %d), now %d\n",
@@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, i) {
-   seqno[i] = ring->get_seqno(ring);
acthd[i] = intel_ring_get_active_head(ring);
+   seqno[i] = ring->get_seqno(ring);
}
 
i915_get_extra_instdone(dev, instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9762aa76bb0a..44d46018ee13 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2969,20 +2969,14 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) >= 0;
 }
 
-static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
-  bool lazy_coherency)
+static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 {
-   if (!lazy_coherency && req->ring->irq_seqno_barrier)
-   req->ring->irq_seqno_barrier(req->ring);
return i915_seqno_passed(req->ring->get_seqno(req->ring),
 req->previous_seqno);
 }
 
-static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
- bool lazy_coherency)
+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
-   if (!lazy_coherency && req->ring->irq_seqno_barrier)
-   req->ring->irq_seqno_barrier(req->ring);
return i915_seqno_passed(req->ring->get_seqno(req->ring),
 req->seqno);
 }
@@ -3636,6 +3630,8 @@ static inline void i915_trace_irq_get(struct 
intel_engine_cs *ring,
 
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
 {
+   struct intel_engine_cs *engine = req->ring;
+
/* Ensure our read of the seqno is coherent so that we
 * do not "miss an interrupt" (i.e. if this is the last
 * request and the seqno write from the GPU is not visible
@@ -3647,7 +3643,10 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 * but it is easier and safer to do it every time the waiter
 * is woken.
 */
-   if (i915_gem_request_completed(req, false))
+   if (engine->irq_seqno_barrier)
+   engine->irq_seqno_barrier(engine);
+
+   if (i915_gem_request_completed(req))
return true;
 
/* We need to check whether any gpu reset happened in between
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b26529f1f44..d125820c6309 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1171,12 +1171,12 @@ static bool __i915_spin_request(struct 
drm_i915_gem_request *req,
 */
 
/* Only spin if we know the GPU is processing this request */
-   if (!i915_gem_request_started(req, true))
+   if (!i915_gem_request_started(req))
return false;
 
timeout = local_clock_us(&cpu) + 5;
do {
-   if (i915_gem_request_completed(req, true))
+   if (i915_gem_request_completed(req))
return true;
 
if (signal_pending_state(state, wait->task))
@@ -1228,7 +1228,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (list_empty(&req->list)

[Intel-gfx] [PATCH 026/190] drm/i915: Stop setting wraparound seqno on initialisation

2016-01-11 Thread Chris Wilson
We have testcases to ensure that seqno wraparound works fine, so we can
forgo forcing everyone to encounter seqno wraparound during early
uptime. seqno wraparound incurs a full GPU stall so not forcing it
will eliminate one jitter from the early system. Using the testcases, we
have very deterministic testing which given how difficult it would be to
debug an issue (GPU hang) stemming from a wraparound using pure
postmortem analysis I see no value in forcing a wrap during boot.

Advancing the global next_seqno after a GPU reset is equally pointless.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 +---
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d125820c6309..a0744626a110 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4814,14 +4814,6 @@ i915_gem_init_hw(struct drm_device *dev)
}
}
 
-   /*
-* Increment the next seqno by 0x100 so we have a visible break
-* on re-initialisation
-*/
-   ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
-   if (ret)
-   goto out;
-
/* Now it is safe to go back round and do everything else: */
for_each_ring(ring, dev_priv, i) {
struct drm_i915_gem_request *req;
@@ -5001,13 +4993,7 @@ i915_gem_load(struct drm_device *dev)
dev_priv->num_fence_regs =
I915_READ(vgtif_reg(avail_rs.fence_num));
 
-   /*
-* Set initial sequence number for requests.
-* Using this number allows the wraparound to happen early,
-* catching any obvious problems.
-*/
-   dev_priv->next_seqno = ((u32)~0 - 0x1100);
-   dev_priv->last_seqno = ((u32)~0 - 0x1101);
+   dev_priv->next_seqno = 1;
 
/* Initialize fence registers to zero */
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 031/190] drm/i915: Harden detection of missed interrupts

2016-01-11 Thread Chris Wilson
Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  6 ++
 drivers/gpu/drm/i915/i915_irq.c | 10 --
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5a706c700684..567f8db4c70a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -735,6 +735,9 @@ static void i915_ring_seqno_info(struct seq_file *m,
seq_printf(m, "Current sequence (%s): %x\n",
   ring->name, intel_ring_get_seqno(ring));
 
+   seq_printf(m, "Current user interrupts (%s): %x\n",
+  ring->name, READ_ONCE(ring->user_interrupts));
+
spin_lock(&ring->breadcrumbs.lock);
for (rb = rb_first(&ring->breadcrumbs.waiters);
 rb != NULL;
@@ -1372,6 +1375,9 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
seq_printf(m, "\tseqno = %x [current %x], waiters? %d\n",
   ring->hangcheck.seqno, seqno[i],
   intel_engine_has_waiter(ring));
+   seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+  ring->hangcheck.user_interrupts,
+  ring->user_interrupts);
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
   (long long)ring->hangcheck.acthd,
   (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bf48fa63127a..b3942dec7de4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,8 +997,10 @@ static void ironlake_rps_change_irq_handler(struct 
drm_device *dev)
 static void notify_ring(struct intel_engine_cs *ring)
 {
ring->irq_posted = true; /* paired with mb() in wake_up_process() */
-   if (intel_engine_wakeup(ring))
+   if (intel_engine_wakeup(ring)) {
trace_i915_gem_request_notify(ring);
+   ring->user_interrupts++;
+   }
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3061,12 +3063,14 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
for_each_ring(ring, dev_priv, i) {
u64 acthd;
u32 seqno;
+   unsigned user_interrupts;
bool busy = true;
 
semaphore_clear_deadlocks(dev_priv);
 
acthd = intel_ring_get_active_head(ring);
seqno = intel_ring_get_seqno(ring);
+   user_interrupts = READ_ONCE(ring->user_interrupts);
 
if (ring->hangcheck.seqno == seqno) {
if (ring_idle(ring, seqno)) {
@@ -3074,7 +3078,8 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
if (intel_engine_has_waiter(ring)) {
/* Issue a wake-up to catch stuck h/w. 
*/
-   if (!test_and_set_bit(ring->id, 
&dev_priv->gpu_error.missed_irq_rings)) {
+   if (ring->hangcheck.user_interrupts == 
user_interrupts &&
+   !test_and_set_bit(ring->id, 
&dev_priv->gpu_error.missed_irq_rings)) {
if (!test_bit(ring->id, 
&dev_priv->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck 
timer elapsed... %s idle\n",
  ring->name);
@@ -3142,6 +3147,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 
ring->hangcheck.seqno = seqno;
ring->hangcheck.acthd = acthd;
+   ring->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3364bcebd456..73da75fa47c1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
u64 acthd;
u64 max_acthd;
u32 seqno;
+   unsigned user_interrupts;
int score;
enum intel_ring_hangcheck_action action;
int deadlock;
@@ -328,6 +329,7 @@ struct  intel_engine_cs {
 * inspecting request list.
 */
u32 last_submitted_seqno;
+   unsigned user_interrupts;
 
bool gpu_caches_dirty;
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 013/190] drm/i915: Suppress error message when GPU resets are disabled

2016-01-11 Thread Chris Wilson
If we do not have lowlevel support for reseting the GPU, or if the user
has explicitly disabled reseting the device, the failure is expected.
Since it is an expected failure, we should be using a lower priority
message than *ERROR*, perhaps NOTICE. In the absence of DRM_NOTICE, just
emit the expected failure as a DEBUG message.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2f03379cdb4b..5160f1414de4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -910,7 +910,10 @@ int i915_reset(struct drm_device *dev)
pr_notice("drm/i915: Resetting chip after gpu hang\n");
 
if (ret) {
-   DRM_ERROR("Failed to reset chip: %i\n", ret);
+   if (ret != -ENODEV)
+   DRM_ERROR("Failed to reset chip: %i\n", ret);
+   else
+   DRM_DEBUG_DRIVER("GPU reset disabled\n");
goto error;
}
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 041/190] drm/i915: Allow userspace to request no-error-capture upon GPU hangs

2016-01-11 Thread Chris Wilson
igt likes to inject GPU hangs into its command streams. However, as we
expect these hangs, we don't actually want them recorded in the dmesg
output or stored in the i915_error_state (usually). To accomodate this
allow userspace to set a flag on the context that any hang emanating
from that context will not be recorded. We still do the error capture
(otherwise how do we find the guilty context and know its intent?) as
part of the reason for random GPU hang injection is to exercise the race
conditions between the error capture and normal execution.

v2: Split out the request->ringbuf error capture changes.
v3: Move the flag defines next to the intel_context->flags definition

Signed-off-by: Chris Wilson 
Acked-by: Daniel Vetter 
Reviewed-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_drv.h |  7 +--
 drivers/gpu/drm/i915/i915_gem_context.c | 13 +
 drivers/gpu/drm/i915/i915_gpu_error.c   | 14 +-
 include/uapi/drm/i915_drm.h |  1 +
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3b795f1566b..57e450e25ad6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -486,6 +486,7 @@ struct drm_i915_error_state {
struct timeval time;
 
char error_msg[128];
+   bool simulated;
int iommu;
u32 reset_count;
u32 suspend_count;
@@ -842,7 +843,6 @@ struct i915_ctx_hang_stats {
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_HANDLE 0
 
-#define CONTEXT_NO_ZEROMAP (1<<0)
 /**
  * struct intel_context - as the name implies, represents a context.
  * @ref: reference count.
@@ -867,11 +867,14 @@ struct intel_context {
int user_handle;
uint8_t remap_slice;
struct drm_i915_private *i915;
-   int flags;
struct drm_i915_file_private *file_priv;
struct i915_ctx_hang_stats hang_stats;
struct i915_hw_ppgtt *ppgtt;
 
+   unsigned flags;
+#define CONTEXT_NO_ZEROMAP (1<<0)
+#define CONTEXT_NO_ERROR_CAPTURE   (1<<1)
+
/* Legacy ring buffer submission */
struct {
struct drm_i915_gem_object *rcs_state;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index e5e9a8918f19..0aea5ccf6d68 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -939,6 +939,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, 
void *data,
else
args->value = to_i915(dev)->gtt.base.total;
break;
+   case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+   args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
+   break;
default:
ret = -EINVAL;
break;
@@ -984,6 +987,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device 
*dev, void *data,
ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
}
break;
+   case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+   if (args->size) {
+   ret = -EINVAL;
+   } else {
+   if (args->value)
+   ctx->flags |= CONTEXT_NO_ERROR_CAPTURE;
+   else
+   ctx->flags &= ~CONTEXT_NO_ERROR_CAPTURE;
+   }
+   break;
default:
ret = -EINVAL;
break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 93da2c7581f6..4f17d6847569 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1040,6 +1040,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
rcu_read_unlock();
}
 
+   error->simulated |= request->ctx->flags & 
CONTEXT_NO_ERROR_CAPTURE;
+
rb = request->ringbuf;
error->ring[i].cpu_ring_head = rb->head;
error->ring[i].cpu_ring_tail = rb->tail;
@@ -1333,12 +1335,14 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
i915_error_capture_msg(dev, error, wedged, error_msg);
DRM_INFO("%s\n", error->error_msg);
 
-   spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-   if (dev_priv->gpu_error.first_error == NULL) {
-   dev_priv->gpu_error.first_error = error;
-   error = NULL;
+   if (!error->simulated) {
+   spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+   if (dev_priv->gpu_error.first_error == NULL) {
+   dev_priv->gpu_error.first_error = error;
+   error = NULL;
+   }
+   spin_unlock_irqrestore(&dev_priv->gpu

[Intel-gfx] [PATCH 032/190] drm/i915: Remove debug noise on detecting fault-injection of missed interrupts

2016-01-11 Thread Chris Wilson
Since the tests can and do explicitly check debugfs/i915_ring_missed_irqs
for the handling of a "missed interrupt", adding it to the dmesg at INFO
is just noise. When it happens for real, we still class it as an ERROR.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b3942dec7de4..502663f13cd8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3083,9 +3083,6 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
if (!test_bit(ring->id, 
&dev_priv->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck 
timer elapsed... %s idle\n",
  ring->name);
-   else
-   DRM_INFO("Fake missed 
irq on %s\n",
-ring->name);
 

intel_engine_enable_fake_irq(ring);
}
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 030/190] drm/i915: Move the get/put irq locking into the caller

2016-01-11 Thread Chris Wilson
With only a single callsite for intel_engine_cs->irq_get and ->irq_put,
we can reduce the code size by moving the common preamble into the
caller, and we can also eliminate the reference counting.

For completeness, as we are no longer doing reference counting on irq,
rename the get/put vfunctions to enable/disable respectively.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c |   8 +-
 drivers/gpu/drm/i915/intel_lrc.c |  53 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 302 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   5 +-
 4 files changed, 125 insertions(+), 243 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index cf9cbcc2d5d7..0ea01bd6811c 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -51,12 +51,16 @@ static void irq_enable(struct intel_engine_cs *engine)
 */
engine->irq_posted = true;
 
-   WARN_ON(!engine->irq_get(engine));
+   spin_lock_irq(&engine->i915->irq_lock);
+   engine->irq_enable(engine);
+   spin_unlock_irq(&engine->i915->irq_lock);
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
-   engine->irq_put(engine);
+   spin_lock_irq(&engine->i915->irq_lock);
+   engine->irq_disable(engine);
+   spin_unlock_irq(&engine->i915->irq_lock);
 
engine->irq_posted = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 27d91f1ceb2b..b1ede2e9b372 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1640,37 +1640,20 @@ static int gen8_emit_bb_start(struct 
drm_i915_gem_request *req,
return 0;
 }
 
-static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
+static void gen8_logical_ring_enable_irq(struct intel_engine_cs *ring)
 {
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   unsigned long flags;
-
-   if (WARN_ON(!intel_irqs_enabled(dev_priv)))
-   return false;
-
-   spin_lock_irqsave(&dev_priv->irq_lock, flags);
-   if (ring->irq_refcount++ == 0) {
-   I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 
ring->irq_keep_mask));
-   POSTING_READ(RING_IMR(ring->mmio_base));
-   }
-   spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+   struct drm_i915_private *dev_priv = ring->i915;
 
-   return true;
+   I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
+   POSTING_READ(RING_IMR(ring->mmio_base));
 }
 
-static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
+static void gen8_logical_ring_disable_irq(struct intel_engine_cs *ring)
 {
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   unsigned long flags;
+   struct drm_i915_private *dev_priv = ring->i915;
 
-   spin_lock_irqsave(&dev_priv->irq_lock, flags);
-   if (--ring->irq_refcount == 0) {
-   I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
-   POSTING_READ(RING_IMR(ring->mmio_base));
-   }
-   spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+   I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
+   POSTING_READ(RING_IMR(ring->mmio_base));
 }
 
 static int gen8_emit_flush(struct drm_i915_gem_request *request,
@@ -1993,8 +1976,8 @@ static int logical_render_ring_init(struct drm_device 
*dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush_render;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
 
ring->dev = dev;
@@ -2039,8 +2022,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
 
return logical_ring_init(dev, ring);
@@ -2063,8 +2046,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
-   ring->irq_get = gen8_logical_ring_get_irq;
-   ring->irq_put = gen8_logical_ring_put_irq;
+   ring->irq_enable = gen8_logical_ring_enable_irq;
+   ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = 

[Intel-gfx] [PATCH 023/190] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted

2016-01-11 Thread Chris Wilson
If we flag the seqno as potentially stale upon receiving an interrupt,
we can use that information to reduce the frequency that we apply the
heavyweight coherent seqno read (i.e. if we wake up a chain of waiters).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  | 15 ++-
 drivers/gpu/drm/i915/i915_irq.c  |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  8 
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 +
 4 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c2ee8efdd928..8940b8d3fa59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3649,7 +3649,20 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 * but it is easier and safer to do it every time the waiter
 * is woken.
 */
-   if (engine->irq_seqno_barrier) {
+   if (engine->irq_seqno_barrier && READ_ONCE(engine->irq_posted)) {
+   /* The ordering of irq_posted versus applying the barrier
+* is crucial. The clearing of the current irq_posted must
+* be visible before we perform the barrier operation,
+* such that if a subsequent interrupt arrives, irq_posted
+* is reasserted and our task rewoken (which causes us to
+* do another __i915_request_irq_complete() immediately
+* and reapply the barrier). Conversely, if the clear
+* occurs after the barrier, then an interrupt that arrived
+* whilst we waited on the barrier would not trigger a
+* barrier on the next pass, and the read may not see the
+* seqno update.
+*/
+   WRITE_ONCE(engine->irq_posted, false);
engine->irq_seqno_barrier(engine);
if (i915_gem_request_completed(req))
return true;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 627c7fb6aa9b..738edd7fbf8d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
return;
 
trace_i915_gem_request_notify(ring);
+   ring->irq_posted = true; /* paired with mb() in wake_up_process() */
intel_engine_wakeup(ring);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index f66acf820c40..d689bd61534e 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -43,12 +43,20 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+   /* Enabling the IRQ may miss the generation of the interrupt, but
+* we still need to force the barrier before reading the seqno,
+* just in case.
+*/
+   engine->irq_posted = true;
+
WARN_ON(!engine->irq_get(engine));
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
engine->irq_put(engine);
+
+   engine->irq_posted = false;
 }
 
 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 28ab07b38c05..6cc8e9c5f8d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -198,6 +198,7 @@ struct  intel_engine_cs {
struct i915_ctx_workarounds wa_ctx;
 
unsigned irq_refcount; /* protected by dev_priv->irq_lock */
+   boolirq_posted;
u32 irq_enable_mask;/* bitmask to enable ring 
interrupt */
struct drm_i915_gem_request *trace_irq_req;
bool __must_check (*irq_get)(struct intel_engine_cs *ring);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 012/190] drm/i915: Prevent leaking of -EIO from i915_wait_request()

2016-01-11 Thread Chris Wilson
Reporting -EIO from i915_wait_request() has proven very troublematic
over the years, with numerous hard-to-reproduce bugs cropping up in the
corner case of where a reset occurs and the code wasn't expecting such
an error.

If the we reset the GPU or have detected a hang and wish to reset the
GPU, the request is forcibly complete and the wait broken. Currently, we
report either -EAGAIN or -EIO in order for the caller to retreat and
restart the wait (if appropriate) after dropping and then reacquiring
the struct_mutex (essential to allow the GPU reset to proceed). However,
if we take the view that the request is complete (no further work will
be done on it by the GPU because it is dead and soon to be reset), then
we can proceed with the task at hand and then drop the struct_mutex
allowing the reset to occur. This transfers the burden of checking
whether it is safe to proceed to the caller, which in all but one
instance it is safe - completely eliminating the source of all spurious
-EIO.

Of note, we only have two API entry points where we expect that
userspace can observe an EIO. First is when submitting an execbuf, if
the GPU is terminally wedged, then the operation cannot succeed and an
-EIO is reported. Secondly, existing userspace uses the throttle ioctl
to detect an already wedged GPU before starting using HW acceleration
(or to confirm that the GPU is wedged after an error condition). So if
the GPU is wedged when the user calls throttle, also report -EIO.

v2: Split more carefully the change to i915_wait_request() and assorted
ABI from the reset handling.
v3: Add a couple of WARN_ON(EIO) to the interruptible modesetting code
so that we don't start to leak EIO there in future (and break our hang
resistant modesetting).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/i915_gem.c | 44 -
 drivers/gpu/drm/i915/i915_gem_userptr.c |  6 ++---
 drivers/gpu/drm/i915/intel_display.c| 13 +-
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  2 +-
 6 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f74bca326b79..bbdb056d2a8e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2978,8 +2978,6 @@ i915_gem_find_active_request(struct intel_engine_cs 
*ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
-int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
- bool interruptible);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 56069bdada85..f570990f03e0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -206,11 +206,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
 
ret = i915_gem_object_set_to_cpu_domain(obj, true);
-   if (ret) {
+   if (WARN_ON(ret)) {
/* In the event of a disaster, abandon all caches and
 * hope for the best.
 */
-   WARN_ON(ret != -EIO);
obj->base.read_domains = obj->base.write_domain = 
I915_GEM_DOMAIN_CPU;
}
 
@@ -1104,15 +1103,13 @@ put_rpm:
return ret;
 }
 
-int
-i915_gem_check_wedge(struct i915_gpu_error *error,
-bool interruptible)
+static int
+i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 {
-   if (i915_reset_in_progress_or_wedged(error)) {
-   /* Recovery complete, but the reset failed ... */
-   if (i915_terminally_wedged(error))
-   return -EIO;
+   if (__i915_terminally_wedged(reset_counter))
+   return -EIO;
 
+   if (__i915_reset_in_progress(reset_counter)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
 * -EIO unconditionally for these. */
if (!interruptible)
@@ -1283,13 +1280,14 @@ int __i915_wait_request(struct drm_i915_gem_request 
*req,
prepare_to_wait(&ring->irq_queue, &wait, state);
 
/* We need to check whether any gpu reset happened in between
-* the caller grabbing the seqno and now ... */
+* the request being submitted and now. If a reset has occurred,
+* the request is effectively complete (we either are in the
+* process of or have discarded the rendering and completely
+* reset the GPU. The results of the request are lost and we
+* are free to continue on with the original operation.
+*/

[Intel-gfx] [PATCH 019/190] drm/i915: Separate out the seqno-barrier from engine->get_seqno

2016-01-11 Thread Chris Wilson
In order to simplify the next couple of patches, extract the
lazy_coherency optimisation our of the engine->get_seqno() vfunc into
its own callback.

v2: Rename the barrier to engine->irq_seqno_barrier to try and better
reflect that the barrier is only required after the user interrupt before
reading the seqno (to ensure that the seqno update lands in time as we
do not have strict seqno-irq ordering on all platforms).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  6 ++---
 drivers/gpu/drm/i915/i915_drv.h  | 12 ++
 drivers/gpu/drm/i915/i915_gpu_error.c|  2 +-
 drivers/gpu/drm/i915/i915_irq.c  |  4 ++--
 drivers/gpu/drm/i915/i915_trace.h|  2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c | 39 
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 36 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 ++--
 9 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 9396597b136d..1499e2337e5d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   ring->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  ring->get_seqno(ring, true),
+  ring->get_seqno(ring),
   
i915_gem_request_completed(work->flip_queued_req, true));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
@@ -734,7 +734,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 
if (ring->get_seqno) {
seq_printf(m, "Current sequence (%s): %x\n",
-  ring->name, ring->get_seqno(ring, false));
+  ring->name, ring->get_seqno(ring));
}
 
spin_lock(&ring->breadcrumbs.lock);
@@ -1354,7 +1354,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, i) {
-   seqno[i] = ring->get_seqno(ring, false);
+   seqno[i] = ring->get_seqno(ring);
acthd[i] = intel_ring_get_active_head(ring);
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a9e8de57e848..9762aa76bb0a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2972,15 +2972,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
   bool lazy_coherency)
 {
-   u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-   return i915_seqno_passed(seqno, req->previous_seqno);
+   if (!lazy_coherency && req->ring->irq_seqno_barrier)
+   req->ring->irq_seqno_barrier(req->ring);
+   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+req->previous_seqno);
 }
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
  bool lazy_coherency)
 {
-   u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
-   return i915_seqno_passed(seqno, req->seqno);
+   if (!lazy_coherency && req->ring->irq_seqno_barrier)
+   req->ring->irq_seqno_barrier(req->ring);
+   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+req->seqno);
 }
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index f805d117f3d1..01d0206ca4dd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -902,8 +902,8 @@ static void i915_record_ring_state(struct drm_device *dev,
 
ering->waiting = intel_engine_has_waiter(ring);
ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
-   ering->seqno = ring->get_seqno(ring, false);
ering->acthd = intel_ring_get_active_head(ring);
+   ering->seqno = ring->get_seqno(ring);
ering->start = I915_READ_START(ring);
ering->head = I915_READ_HEAD(ring);
ering->tail = I915_READ_TAIL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 95b997a57da8..d73669783045 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
if (signaller->hangc

[Intel-gfx] [PATCH 036/190] drm/i915: Restore waitboost credit to the synchronous waiter

2016-01-11 Thread Chris Wilson
Ideally, we want to automagically have the GPU respond to the
instantaneous load by reclocking itself. However, reclocking occurs
relatively slowly, and to the client waiting for a result from the GPU,
too late. To compensate and reduce the client latency, we allow the
first wait from a client to boost the GPU clocks to maximum. This
overcomes the lag in autoreclocking, at the expense of forcing the GPU
clocks too high. So to offset the excessive power usage, we currently
allow a client to only boost the clocks once before we detect the GPU
is idle again. This works reasonably for say the first frame in a
benchmark, but for many more synchronous workloads (like OpenCL) we find
the GPU clocks remain too low. By noting a wait which would idle the GPU
(i.e. we just waited upon the last known request), we can give that
client the idle boost credit (for their next wait) without the 100ms
delay required for us to detect the GPU idle state. The intention is to
boost clients that are stalling in the process of feeding the GPU more
work (and who in doing so let the GPU idle), without granting boost
credits to clients that are throttling themselves (such as compositors).

Signed-off-by: Chris Wilson 
Cc: "Zou, Nanhai" 
Cc: Jesse Barnes 
Reviewed-by: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e9f5ca7ea835..3fea582768e9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1314,6 +1314,22 @@ complete:
*timeout = 0;
}
 
+   if (ret == 0 && rps && req->seqno == req->ring->last_submitted_seqno) {
+   /* The GPU is now idle and this client has stalled.
+* Since no other client has submitted a request in the
+* meantime, assume that this client is the only one
+* supplying work to the GPU but is unable to keep that
+* work supplied because it is waiting. Since the GPU is
+* then never kept fully busy, RPS autoclocking will
+* keep the clocks relatively low, causing further delays.
+* Compensate by giving the synchronous client credit for
+* a waitboost next time.
+*/
+   spin_lock(&req->i915->rps.client_lock);
+   list_del_init(&rps->link);
+   spin_unlock(&req->i915->rps.client_lock);
+   }
+
return ret;
 }
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 050/190] drm/i915: Refactor duplicate object vmap functions

2016-01-11 Thread Chris Wilson
We now have two implementations for vmapping a whole object, one for
dma-buf and one for the ringbuffer. If we couple the vmapping into the
obj->pages lifetime, then we can reuse an obj->vmapping for both and at
the same time couple it into the shrinker.

v2: Mark the failable kmalloc() as __GFP_NOWARN (vsyrjala)
v3: Call unpin_vmap from the right dmabuf unmapper

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h | 12 +---
 drivers/gpu/drm/i915/i915_gem.c | 41 +
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  | 53 -
 drivers/gpu/drm/i915/intel_ringbuffer.c | 53 ++---
 4 files changed, 71 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49a151126b2a..56cf2ffc1eac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2114,10 +2114,7 @@ struct drm_i915_gem_object {
struct scatterlist *sg;
int last;
} get_page;
-
-   /* prime dma-buf support */
-   void *dma_buf_vmapping;
-   int vmapping_count;
+   void *vmapping;
 
/** Breadcrumb of last rendering to the buffer.
 * There can only be one writer, but we allow for multiple readers.
@@ -2774,12 +2771,19 @@ static inline void i915_gem_object_pin_pages(struct 
drm_i915_gem_object *obj)
BUG_ON(obj->pages == NULL);
obj->pages_pin_count++;
 }
+
 static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
BUG_ON(obj->pages_pin_count == 0);
obj->pages_pin_count--;
 }
 
+void *__must_check i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj);
+static inline void i915_gem_object_unpin_vmap(struct drm_i915_gem_object *obj)
+{
+   i915_gem_object_unpin_pages(obj);
+}
+
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 struct intel_engine_cs *to,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9df00e694cd9..2912e8714f5b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1854,6 +1854,11 @@ i915_gem_object_put_pages(struct drm_i915_gem_object 
*obj)
ops->put_pages(obj);
obj->pages = NULL;
 
+   if (obj->vmapping) {
+   vunmap(obj->vmapping);
+   obj->vmapping = NULL;
+   }
+
i915_gem_object_invalidate(obj);
 
return 0;
@@ -2019,6 +2024,42 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
*obj)
return 0;
 }
 
+void *i915_gem_object_pin_vmap(struct drm_i915_gem_object *obj)
+{
+   int ret;
+
+   ret = i915_gem_object_get_pages(obj);
+   if (ret)
+   return ERR_PTR(ret);
+
+   i915_gem_object_pin_pages(obj);
+
+   if (obj->vmapping == NULL) {
+   struct sg_page_iter sg_iter;
+   struct page **pages;
+   int n;
+
+   n = obj->base.size >> PAGE_SHIFT;
+   pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);
+   if (pages == NULL)
+   pages = drm_malloc_ab(n, sizeof(*pages));
+   if (pages != NULL) {
+   n = 0;
+   for_each_sg_page(obj->pages->sgl, &sg_iter, 
obj->pages->nents, 0)
+   pages[n++] = sg_page_iter_page(&sg_iter);
+
+   obj->vmapping = vmap(pages, n, 0, PAGE_KERNEL);
+   drm_free_large(pages);
+   }
+   if (obj->vmapping == NULL) {
+   i915_gem_object_unpin_pages(obj);
+   return ERR_PTR(-ENOMEM);
+   }
+   }
+
+   return obj->vmapping;
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index e9c2bfd85b52..8894648acee0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -95,14 +95,12 @@ static void i915_gem_unmap_dma_buf(struct 
dma_buf_attachment *attachment,
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
 
-   mutex_lock(&obj->base.dev->struct_mutex);
-
dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
sg_free_table(sg);
kfree(sg);
 
+   mutex_lock(&obj->base.dev->struct_mutex);
i915_gem_object_unpin_pages(obj);
-
mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
@@ -110,51 +108,17 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
-   struct sg_page_iter sg_iter;
-   struct page **pages;
-   int ret, i;

[Intel-gfx] [PATCH 048/190] drm/i915: Disable waitboosting for fence_wait()

2016-01-11 Thread Chris Wilson
We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c | 6 +++---
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index a796dbd1b0e4..01893d847dfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -68,7 +68,7 @@ static signed long i915_fence_wait(struct fence *fence,
 
ret = __i915_wait_request(to_i915_request(fence),
  interruptible, timeout,
- NULL);
+ NO_WAITBOOST);
if (ret == -ETIME)
return 0;
 
@@ -621,7 +621,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 * forcing the clocks too high for the whole system, we only allow
 * each client to waitboost once in a busy period.
 */
-   if (INTEL_INFO(req->i915)->gen >= 6)
+   if (!IS_ERR(rps) && INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
intel_wait_init(&wait, req->fence.seqno);
@@ -691,7 +691,7 @@ complete:
*timeout = 0;
}
 
-   if (ret == 0 && rps &&
+   if (ret == 0 && !IS_ERR_OR_NULL(rps) &&
req->fence.seqno == req->ring->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
 * Since no other client has submitted a request in the
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 0ab14fd0fce0..6b3de827929a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -179,6 +179,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
__i915_add_request(req, NULL, false)
 
 struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
 
 int __i915_wait_request(struct drm_i915_gem_request *req,
bool interruptible,
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 015/190] drm/i915: Remove the dedicated hangcheck workqueue

2016-01-11 Thread Chris Wilson
The queue only ever contains at most one item and has no special flags.
It is just a very simple wrapper around the system-wq - a complication
with no benefits.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c | 11 ---
 drivers/gpu/drm/i915/i915_drv.h |  1 -
 drivers/gpu/drm/i915/i915_irq.c |  6 +++---
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 44a896ce32e6..9e49e304dd8e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1016,14 +1016,6 @@ int i915_driver_load(struct drm_device *dev, unsigned 
long flags)
goto out_freewq;
}
 
-   dev_priv->gpu_error.hangcheck_wq =
-   alloc_ordered_workqueue("i915-hangcheck", 0);
-   if (dev_priv->gpu_error.hangcheck_wq == NULL) {
-   DRM_ERROR("Failed to create our hangcheck workqueue.\n");
-   ret = -ENOMEM;
-   goto out_freedpwq;
-   }
-
intel_irq_init(dev_priv);
intel_uncore_sanitize(dev);
 
@@ -1105,8 +1097,6 @@ out_gem_unload:
intel_teardown_gmbus(dev);
intel_teardown_mchbar(dev);
pm_qos_remove_request(&dev_priv->pm_qos);
-   destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
-out_freedpwq:
destroy_workqueue(dev_priv->hotplug.dp_wq);
 out_freewq:
destroy_workqueue(dev_priv->wq);
@@ -1209,7 +1199,6 @@ int i915_driver_unload(struct drm_device *dev)
 
destroy_workqueue(dev_priv->hotplug.dp_wq);
destroy_workqueue(dev_priv->wq);
-   destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
pm_qos_remove_request(&dev_priv->pm_qos);
 
i915_global_gtt_cleanup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d9d411919779..188bed933f11 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1330,7 +1330,6 @@ struct i915_gpu_error {
/* Hang gpu twice in this window and your context gets banned */
 #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
 
-   struct workqueue_struct *hangcheck_wq;
struct delayed_work hangcheck_work;
 
/* For reset and error_state handling. */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 94f5f4e99446..8939438d747d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3175,7 +3175,7 @@ out:
 
 void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 {
-   struct i915_gpu_error *e = &dev_priv->gpu_error;
+   unsigned long delay;
 
if (!i915.enable_hangcheck)
return;
@@ -3185,8 +3185,8 @@ void i915_queue_hangcheck(struct drm_i915_private 
*dev_priv)
 * we will ignore a hung ring if a second ring is kept busy.
 */
 
-   queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work,
-  
round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
+   delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
+   schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay);
 }
 
 static void ibx_irq_reset(struct drm_device *dev)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 045/190] drm/i915: Move releasing of the GEM request from free to retire/cancel

2016-01-11 Thread Chris Wilson
If we move the release of the GEM request (i.e. decoupling it from the
various lists used for client and context tracking) after it is complete
(either by the GPU retiring the request, or by the caller cancelling the
request), we can remove the requirement that the final unreference of
the GEM request need to be under the struct_mutex.

v2: Execlists as always is badly asymetric and year old patches still
haven't landed to fix it up.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_request.c  | 50 ++--
 drivers/gpu/drm/i915/i915_gem_request.h  | 14 -
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  2 +-
 drivers/gpu/drm/i915/intel_display.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c |  6 ++--
 drivers/gpu/drm/i915/intel_pm.c  |  2 +-
 7 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 68a25617ca7a..6d8d65304abf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
ret = __i915_wait_request(req[i], true,
  args->timeout_ns > 0 ? 
&args->timeout_ns : NULL,
  to_rps_client(file));
-   i915_gem_request_unreference__unlocked(req[i]);
+   i915_gem_request_unreference(req[i]);
}
return ret;
 
@@ -3505,7 +3505,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
return 0;
 
ret = __i915_wait_request(target, true, NULL, NULL);
-   i915_gem_request_unreference__unlocked(target);
+   i915_gem_request_unreference(target);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index b4ede6dd7b20..1c4f4d83a3c2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -184,13 +184,6 @@ err:
return ret;
 }
 
-void i915_gem_request_cancel(struct drm_i915_gem_request *req)
-{
-   intel_ring_reserved_space_cancel(req->ringbuf);
-
-   i915_gem_request_unreference(req);
-}
-
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
   struct drm_file *file)
 {
@@ -235,9 +228,28 @@ i915_gem_request_remove_from_client(struct 
drm_i915_gem_request *request)
request->pid = NULL;
 }
 
+static void __i915_gem_request_release(struct drm_i915_gem_request *request)
+{
+   i915_gem_request_remove_from_client(request);
+
+   i915_gem_context_unreference(request->ctx);
+   i915_gem_request_unreference(request);
+}
+
+void i915_gem_request_cancel(struct drm_i915_gem_request *req)
+{
+   intel_ring_reserved_space_cancel(req->ringbuf);
+   if (i915.enable_execlists) {
+   if (req->ctx != req->ring->default_context)
+   intel_lr_context_unpin(req);
+   }
+   __i915_gem_request_release(req);
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
+   list_del_init(&request->list);
 
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
@@ -248,11 +260,7 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 * completion order.
 */
request->ringbuf->last_retired_head = request->postfix;
-
-   list_del_init(&request->list);
-   i915_gem_request_remove_from_client(request);
-
-   i915_gem_request_unreference(request);
+   __i915_gem_request_release(request);
 }
 
 void
@@ -639,21 +647,7 @@ i915_wait_request(struct drm_i915_gem_request *req)
 
 void i915_gem_request_free(struct kref *req_ref)
 {
-   struct drm_i915_gem_request *req = container_of(req_ref,
-typeof(*req), ref);
-   struct intel_context *ctx = req->ctx;
-
-   if (req->file_priv)
-   i915_gem_request_remove_from_client(req);
-
-   if (ctx) {
-   if (i915.enable_execlists) {
-   if (ctx != req->ring->default_context)
-   intel_lr_context_unpin(req);
-   }
-
-   i915_gem_context_unreference(ctx);
-   }
-
+   struct drm_i915_gem_request *req =
+   container_of(req_ref, typeof(*req), ref);
kmem_cache_free(req->i915->requests, req);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index d46f22f30b0a..af1b825fce50 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -154,23 +154,9 @@ i915_gem_request_reference(struct drm_i915_g

[Intel-gfx] [PATCH 059/190] drm/i915: Rename request->ringbuf to request->ring

2016-01-11 Thread Chris Wilson
Now that we have disambuigated ring and engine, we can use the clearer
and more consistent name for the intel_ringbuffer pointer in the
request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|   8 +-
 drivers/gpu/drm/i915/i915_gem_context.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c|   6 +-
 drivers/gpu/drm/i915/i915_gem_request.c|  20 ++--
 drivers/gpu/drm/i915/i915_gem_request.h|   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  31 +++---
 drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/intel_display.c   |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 152 ++---
 drivers/gpu/drm/i915/intel_mocs.c  |  34 +++
 drivers/gpu/drm/i915/intel_overlay.c   |  42 
 drivers/gpu/drm/i915/intel_ringbuffer.c|  86 
 13 files changed, 198 insertions(+), 203 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6622c9bb3af8..430c439ece26 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4083,11 +4083,11 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, 
int slice)
 * at initialization time.
 */
for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
-   intel_ring_emit(req->ringbuf, MI_LOAD_REGISTER_IMM(1));
-   intel_ring_emit_reg(req->ringbuf, GEN7_L3LOG(slice, i));
-   intel_ring_emit(req->ringbuf, remap_info[i]);
+   intel_ring_emit(req->ring, MI_LOAD_REGISTER_IMM(1));
+   intel_ring_emit_reg(req->ring, GEN7_L3LOG(slice, i));
+   intel_ring_emit(req->ring, remap_info[i]);
}
-   intel_ring_advance(req->ringbuf);
+   intel_ring_advance(req->ring);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index dece033cf604..5b4e77a80c19 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private 
*file_priv, u32 id)
 static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e7df91f9a51f..a0f5a997c2f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1148,7 +1148,7 @@ i915_gem_execbuffer_retire_commands(struct 
i915_execbuffer_params *params)
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret, i;
 
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
@@ -1229,7 +1229,7 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
-   struct intel_ringbuffer *ring = params->request->ringbuf;
+   struct intel_ringbuffer *ring = params->request->ring;
struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cb7cb59d4c4a..38c109cda904 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -656,7 +656,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
  unsigned entry,
  dma_addr_t addr)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
BUG_ON(entry >= 4);
@@ -1648,7 +1648,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
@@ -1686,7 +1686,7 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
diff --git a/drivers/gpu/drm/i915/i915_gem_req

[Intel-gfx] [PATCH 040/190] drm/i915: Record the ringbuffer associated with the request

2016-01-11 Thread Chris Wilson
The request tells us where to read the ringbuf from, so use that
information to simplify the error capture. If no request was active at
the time of the hang, the ring is idle and there is no information
inside the ring pertaining to the hang.

Note carefully that this will reduce the amount of information stored in
the error state - any ring without an active request will not be
recorded.

Signed-off-by: Chris Wilson 
Reviewed-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3e137fc701cf..93da2c7581f6 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -995,7 +995,6 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = &dev_priv->ring[i];
-   struct intel_ringbuffer *rbuf;
 
error->ring[i].pid = -1;
 
@@ -1009,6 +1008,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
request = i915_gem_find_active_request(ring);
if (request) {
struct i915_address_space *vm;
+   struct intel_ringbuffer *rb;
 
vm = request->ctx && request->ctx->ppgtt ?
&request->ctx->ppgtt->base :
@@ -1039,26 +1039,14 @@ static void i915_gem_record_rings(struct drm_device 
*dev,
}
rcu_read_unlock();
}
-   }
 
-   if (i915.enable_execlists) {
-   /* TODO: This is only a small fix to keep basic error
-* capture working, but we need to add more information
-* for it to be useful (e.g. dump the context being
-* executed).
-*/
-   if (request)
-   rbuf = request->ctx->engine[ring->id].ringbuf;
-   else
-   rbuf = 
ring->default_context->engine[ring->id].ringbuf;
-   } else
-   rbuf = ring->buffer;
-
-   error->ring[i].cpu_ring_head = rbuf->head;
-   error->ring[i].cpu_ring_tail = rbuf->tail;
-
-   error->ring[i].ringbuffer =
-   i915_error_ggtt_object_create(dev_priv, rbuf->obj);
+   rb = request->ringbuf;
+   error->ring[i].cpu_ring_head = rb->head;
+   error->ring[i].cpu_ring_tail = rb->tail;
+   error->ring[i].ringbuffer =
+   i915_error_ggtt_object_create(dev_priv,
+ rb->obj);
+   }
 
error->ring[i].hws_page =
i915_error_ggtt_object_create(dev_priv, 
ring->status_page.obj);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 043/190] drm/i915: Skip capturing an error state if we already have one

2016-01-11 Thread Chris Wilson
As we only ever keep the first error state around, we can avoid some
work that can be quite intrusive if we don't record the error the second
time around. This does move the race whereby the user could discard one
error state as the second is being captured, but that race exists in the
current code and we hope that recapturing error state is only done for
debugging.

Note that as we discard the error state for simulated errors, igt that
exercise error capture continue to function.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 4f17d6847569..86f582115313 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1312,6 +1312,9 @@ void i915_capture_error_state(struct drm_device *dev, 
bool wedged,
struct drm_i915_error_state *error;
unsigned long flags;
 
+   if (READ_ONCE(dev_priv->gpu_error.first_error))
+   return;
+
/* Account for pipe specific data like PIPE*STAT */
error = kzalloc(sizeof(*error), GFP_ATOMIC);
if (!error) {
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 058/190] drm/i915: Rename request->ring to request->engine

2016-01-11 Thread Chris Wilson
In order to disambiguate between the pointer to the intel_engine_cs
(called ring) and the intel_ringbuffer (called ringbuf), rename
s/ring/engine/.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  11 +--
 drivers/gpu/drm/i915/i915_drv.h  |   2 +-
 drivers/gpu/drm/i915/i915_gem.c  |  32 +++
 drivers/gpu/drm/i915/i915_gem_context.c  |  70 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   8 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  47 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  18 ++--
 drivers/gpu/drm/i915/i915_gem_request.c  |  53 ---
 drivers/gpu/drm/i915/i915_gem_request.h  |  10 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|   3 +-
 drivers/gpu/drm/i915/i915_guc_submission.c   |   8 +-
 drivers/gpu/drm/i915/i915_trace.h|  32 +++
 drivers/gpu/drm/i915/intel_breadcrumbs.c |   2 +-
 drivers/gpu/drm/i915/intel_display.c |  10 +-
 drivers/gpu/drm/i915/intel_lrc.c | 134 +--
 drivers/gpu/drm/i915/intel_mocs.c|  13 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  62 ++---
 17 files changed, 240 insertions(+), 275 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 387ae77d3c29..018076c89247 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -185,8 +185,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, " (%s mappable)", s);
}
if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)",
-  
i915_gem_request_get_ring(obj->last_write_req)->name);
+   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -593,14 +592,14 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
void *data)
   pipe, plane);
}
if (work->flip_queued_req) {
-   struct intel_engine_cs *ring =
-   
i915_gem_request_get_ring(work->flip_queued_req);
+   struct intel_engine_cs *engine =
+   work->flip_queued_req->engine;
 
seq_printf(m, "Flip queued on %s at seqno %x, 
next seqno %x [current breadcrumb %x], completed? %d\n",
-  ring->name,
+  engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  intel_ring_get_seqno(ring),
+  intel_ring_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 58e9e5e50769..baede4517c70 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3410,7 +3410,7 @@ wait_remaining_ms_from_jiffies(unsigned long 
timestamp_jiffies, int to_wait_ms)
 }
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
 {
-   struct intel_engine_cs *engine = req->ring;
+   struct intel_engine_cs *engine = req->engine;
 
/* Before we do the heavier coherent read of the seqno,
 * check the value (hopefully) in the CPU cacheline.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 247731672cb1..6622c9bb3af8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1122,7 +1122,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
if (ret)
return ret;
 
-   i = obj->last_write_req->ring->id;
+   i = obj->last_write_req->engine->id;
if (obj->last_read_req[i] == obj->last_write_req)
i915_gem_object_retire__read(obj, i);
else
@@ -1149,7 +1149,7 @@ static void
 i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
   struct drm_i915_gem_request *req)
 {
-   int ring = req->ring->id;
+   int ring = req->engine->id;
 
if (obj->last_read_req[ring] == req)
i915_gem_object_retire__read(obj, ring);
@@ -2062,17 +2062,15 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request 

[Intel-gfx] [PATCH 055/190] drm/i915: Unify intel_logical_ring_emit and intel_ring_emit

2016-01-11 Thread Chris Wilson
Both perform the same actions with more or less indirection, so just
unify the code.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c|   8 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  34 -
 drivers/gpu/drm/i915/i915_gem_gtt.c|  26 +++
 drivers/gpu/drm/i915/intel_display.c   |  26 +++
 drivers/gpu/drm/i915/intel_lrc.c   | 114 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |  26 ---
 drivers/gpu/drm/i915/intel_mocs.c  |  30 
 drivers/gpu/drm/i915/intel_overlay.c   |  42 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 101 -
 drivers/gpu/drm/i915/intel_ringbuffer.h|  21 ++
 11 files changed, 194 insertions(+), 236 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c2a1ec8abc11..247731672cb1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4068,7 +4068,7 @@ err:
 
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
-   struct intel_engine_cs *ring = req->ring;
+   struct intel_ringbuffer *ring = req->ringbuf;
struct drm_i915_private *dev_priv = req->i915;
u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
int i, ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 3e3b4bf3fed1..d58de7e084dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -519,7 +519,7 @@ i915_gem_context_get(struct drm_i915_file_private 
*file_priv, u32 id)
 static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
-   struct intel_engine_cs *ring = req->ring;
+   struct intel_ringbuffer *ring = req->ringbuf;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
@@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * itlb_before_ctx_switch.
 */
if (IS_GEN6(req->i915)) {
-   ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
+   ret = req->ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
@@ -562,7 +562,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_ring(signaller, req->i915, i) {
-   if (signaller == ring)
+   if (signaller == req->ring)
continue;
 
intel_ring_emit_reg(ring, 
RING_PSMI_CTL(signaller->mmio_base));
@@ -587,7 +587,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_ring(signaller, req->i915, i) {
-   if (signaller == ring)
+   if (signaller == req->ring)
continue;
 
intel_ring_emit_reg(ring, 
RING_PSMI_CTL(signaller->mmio_base));
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 78b462956c78..603a247ac333 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1146,14 +1146,12 @@ i915_gem_execbuffer_retire_commands(struct 
i915_execbuffer_params *params)
 }
 
 static int
-i915_reset_gen7_sol_offsets(struct drm_device *dev,
-   struct drm_i915_gem_request *req)
+i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
-   struct intel_engine_cs *ring = req->ring;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct intel_ringbuffer *ring = req->ringbuf;
int ret, i;
 
-   if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
+   if (!IS_GEN7(req->i915) || req->ring->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL;
}
@@ -1231,9 +1229,8 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
   struct drm_i915_gem_execbuffer2 *args,
   struct list_head *vmas)
 {
-   struct drm_device *dev = params->dev;
-   struct intel_engine_cs *ring = params->ring;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct intel_ringbuffer *ring = params->request->ringbuf;
+   struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
u32 instp_mask;
@@ -1247,34 +1244,31 @@ i915_gem_ringb

[Intel-gfx] [PATCH 037/190] drm/i915: Add background commentary to "waitboosting"

2016-01-11 Thread Chris Wilson
Describe the intent of boosting the GPU frequency to maximum before
waiting on the GPU.

RPS waitboosting was introduced with

commit b29c19b645287f7062e17d70fa4e9781a01a5d88
Author: Chris Wilson 
Date:   Wed Sep 25 17:34:56 2013 +0100

drm/i915: Boost RPS frequency for CPU stalls

but lacked a concise comment in the code to explain itself.

Signed-off-by: Chris Wilson 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3fea582768e9..3948e85eaa48 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1244,6 +1244,22 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
}
 
trace_i915_gem_request_wait_begin(req);
+
+   /* This client is about to stall waiting for the GPU. In many cases
+* this is undesirable and limits the throughput of the system, as
+* many clients cannot continue processing user input/output whilst
+* blocked. RPS autotuning may take tens of milliseconds to respond
+* to the GPU load and thus incurs additional latency for the client.
+* We can circumvent that by promoting the GPU frequency to maximum
+* before we wait. This makes the GPU throttle up much more quickly
+* (good for benchmarks and user experience, e.g. window animations),
+* but at a cost of spending more power processing the workload
+* (bad for battery). Not all clients even want their results
+* immediately and for them we should just let the GPU select its own
+* frequency to maximise efficiency. To prevent a single client from
+* forcing the clocks too high for the whole system, we only allow
+* each client to waitboost once in a busy period.
+*/
if (INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 047/190] drm/i915: Rename request reference/unreference to get/put

2016-01-11 Thread Chris Wilson
Now that we derive requests from struct fence, swap over to its
nomenclature for references. It's shorter and more idiomatic across the
kernel.

s/i915_gem_request_reference/i915_gem_request_get/
s/i915_gem_request_unreference/i915_gem_request_put/

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  | 14 +++---
 drivers/gpu/drm/i915/i915_gem_request.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 ++--
 drivers/gpu/drm/i915/intel_display.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c |  4 ++--
 drivers/gpu/drm/i915/intel_pm.c  |  5 ++---
 7 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6d8d65304abf..fd61e722b595 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1185,7 +1185,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (req == NULL)
return 0;
 
-   requests[n++] = i915_gem_request_reference(req);
+   requests[n++] = i915_gem_request_get(req);
} else {
for (i = 0; i < I915_NUM_RINGS; i++) {
struct drm_i915_gem_request *req;
@@ -1194,7 +1194,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (req == NULL)
continue;
 
-   requests[n++] = i915_gem_request_reference(req);
+   requests[n++] = i915_gem_request_get(req);
}
}
 
@@ -1207,7 +1207,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
for (i = 0; i < n; i++) {
if (ret == 0)
i915_gem_object_retire_request(obj, requests[i]);
-   i915_gem_request_unreference(requests[i]);
+   i915_gem_request_put(requests[i]);
}
 
return ret;
@@ -2492,7 +2492,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
if (obj->last_read_req[i] == NULL)
continue;
 
-   req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
+   req[n++] = i915_gem_request_get(obj->last_read_req[i]);
}
 
mutex_unlock(&dev->struct_mutex);
@@ -2502,7 +2502,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
ret = __i915_wait_request(req[i], true,
  args->timeout_ns > 0 ? 
&args->timeout_ns : NULL,
  to_rps_client(file));
-   i915_gem_request_unreference(req[i]);
+   i915_gem_request_put(req[i]);
}
return ret;
 
@@ -3498,14 +3498,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
target = request;
}
if (target)
-   i915_gem_request_reference(target);
+   i915_gem_request_get(target);
spin_unlock(&file_priv->mm.lock);
 
if (target == NULL)
return 0;
 
ret = __i915_wait_request(target, true, NULL, NULL);
-   i915_gem_request_unreference(target);
+   i915_gem_request_put(target);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index e366ca0dcd99..a796dbd1b0e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -326,7 +326,7 @@ static void __i915_gem_request_release(struct 
drm_i915_gem_request *request)
i915_gem_request_remove_from_client(request);
 
i915_gem_context_unreference(request->ctx);
-   i915_gem_request_unreference(request);
+   i915_gem_request_put(request);
 }
 
 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index b55d0b7c7f2a..0ab14fd0fce0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -147,13 +147,13 @@ to_request(struct fence *fence)
 }
 
 static inline struct drm_i915_gem_request *
-i915_gem_request_reference(struct drm_i915_gem_request *req)
+i915_gem_request_get(struct drm_i915_gem_request *req)
 {
return to_request(fence_get(&req->fence));
 }
 
 static inline void
-i915_gem_request_unreference(struct drm_i915_gem_request *req)
+i915_gem_request_put(struct drm_i915_gem_request *req)
 {
fence_put(&req->fence);
 }
@@ -162,10 +162,10 @@ static inline void i915_gem_request_assign(struct 
drm_i915_gem_request **pdst,
   struct drm_i915_gem_request *src)
 {
if (src)
-   i915_gem_request_reference(src);
+   i91

[Intel-gfx] [PATCH 008/190] drm/i915: Simplify checking of GPU reset_counter in display pageflips

2016-01-11 Thread Chris Wilson
If we, when we store the reset_counter for the operation, we ensure that
it is not in a wedged or in the middle of a reset, we can then assert that
if any reset occurs the reset_counter must change. Later we can just
compare the operation's reset epoch against the current counter to see
if we need to abort the operation (to handle the hang).

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_display.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 0933bdbaa935..183c05bdb220 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3288,14 +3288,12 @@ void intel_finish_reset(struct drm_device *dev)
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
struct drm_device *dev = crtc->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
unsigned reset_counter;
bool pending;
 
-   reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-   if (intel_crtc->reset_counter != reset_counter ||
-   __i915_reset_in_progress_or_wedged(reset_counter))
+   reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error);
+   if (intel_crtc->reset_counter != reset_counter)
return false;
 
spin_lock_irq(&dev->event_lock);
@@ -11011,8 +11009,7 @@ static bool page_flip_finished(struct intel_crtc *crtc)
unsigned reset_counter;
 
reset_counter = i915_reset_counter(&dev_priv->gpu_error);
-   if (crtc->reset_counter != reset_counter ||
-   __i915_reset_in_progress_or_wedged(reset_counter))
+   if (crtc->reset_counter != reset_counter)
return true;
 
/*
@@ -11668,8 +11665,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
if (ret)
goto cleanup;
 
-   atomic_inc(&intel_crtc->unpin_work_count);
intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+   if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) {
+   ret = -EIO;
+   goto cleanup;
+   }
+
+   atomic_inc(&intel_crtc->unpin_work_count);
 
if (INTEL_INFO(dev)->gen >= 5 || IS_G4X(dev))
work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 021/190] drm/i915: Use HWS for seqno tracking everywhere

2016-01-11 Thread Chris Wilson
By using the same address for storing the HWS on every platform, we can
remove the platform specific vfuncs and reduce the get-seqno routine to
a single read of a cached memory location.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 10 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  4 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|  2 +-
 drivers/gpu/drm/i915/i915_irq.c  |  4 +-
 drivers/gpu/drm/i915/i915_trace.h|  2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c |  4 +-
 drivers/gpu/drm/i915/intel_lrc.c | 46 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 86 
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  7 +--
 9 files changed, 43 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d09e48455dcb..5a706c700684 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   ring->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  ring->get_seqno(ring),
+  intel_ring_get_seqno(ring),
   
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
@@ -732,10 +732,8 @@ static void i915_ring_seqno_info(struct seq_file *m,
 {
struct rb_node *rb;
 
-   if (ring->get_seqno) {
-   seq_printf(m, "Current sequence (%s): %x\n",
-  ring->name, ring->get_seqno(ring));
-   }
+   seq_printf(m, "Current sequence (%s): %x\n",
+  ring->name, intel_ring_get_seqno(ring));
 
spin_lock(&ring->breadcrumbs.lock);
for (rb = rb_first(&ring->breadcrumbs.waiters);
@@ -1355,7 +1353,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
 
for_each_ring(ring, dev_priv, i) {
acthd[i] = intel_ring_get_active_head(ring);
-   seqno[i] = ring->get_seqno(ring);
+   seqno[i] = intel_ring_get_seqno(ring);
}
 
i915_get_extra_instdone(dev, instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44d46018ee13..fcedcbc50834 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2971,13 +2971,13 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 
 static inline bool i915_gem_request_started(struct drm_i915_gem_request *req)
 {
-   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+   return i915_seqno_passed(intel_ring_get_seqno(req->ring),
 req->previous_seqno);
 }
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
-   return i915_seqno_passed(req->ring->get_seqno(req->ring),
+   return i915_seqno_passed(intel_ring_get_seqno(req->ring),
 req->seqno);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 01d0206ca4dd..3e137fc701cf 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -903,7 +903,7 @@ static void i915_record_ring_state(struct drm_device *dev,
ering->waiting = intel_engine_has_waiter(ring);
ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
ering->acthd = intel_ring_get_active_head(ring);
-   ering->seqno = ring->get_seqno(ring);
+   ering->seqno = intel_ring_get_seqno(ring);
ering->start = I915_READ_START(ring);
ering->head = I915_READ_HEAD(ring);
ering->tail = I915_READ_TAIL(ring);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d73669783045..627c7fb6aa9b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2903,7 +2903,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
return -1;
 
-   if (i915_seqno_passed(signaller->get_seqno(signaller), seqno))
+   if (i915_seqno_passed(intel_ring_get_seqno(signaller), seqno))
return 1;
 
/* cursory check for an unkickable deadlock */
@@ -3068,7 +3068,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
semaphore_clear_deadlocks(dev_priv);
 
acthd = intel_ring_get_active_head(ring);
-   seqno = ring->get_seqno(ring);
+   seqno = intel_ring_get_seqno(ring);
 
if (ring->hangcheck.seqno == seqno) {
if (ring_idle(ring, seqno)) {
d

[Intel-gfx] [PATCH 051/190] drm,i915: Introduce drm_malloc_gfp()

2016-01-11 Thread Chris Wilson
I have instances where I want to use drm_malloc_ab() but with a custom
gfp mask. And with those, where I want a temporary allocation, I want to
try a high-order kmalloc() before using a vmalloc().

So refactor my usage into drm_malloc_gfp().

Signed-off-by: Chris Wilson 
Cc: dri-de...@lists.freedesktop.org
Cc: Ville Syrjälä 
Reviewed-by: Ville Syrjälä 
Acked-by: Dave Airlie 
---
 drivers/gpu/drm/i915/i915_gem.c|  4 +---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +++-
 drivers/gpu/drm/i915/i915_gem_gtt.c|  5 +++--
 drivers/gpu/drm/i915/i915_gem_userptr.c| 15 ---
 include/drm/drm_mem_util.h | 19 +++
 5 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2912e8714f5b..a4f9c5bbb883 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2040,9 +2040,7 @@ void *i915_gem_object_pin_vmap(struct drm_i915_gem_object 
*obj)
int n;
 
n = obj->base.size >> PAGE_SHIFT;
-   pages = kmalloc(n*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);
-   if (pages == NULL)
-   pages = drm_malloc_ab(n, sizeof(*pages));
+   pages = drm_malloc_gfp(n, sizeof(*pages), GFP_TEMPORARY);
if (pages != NULL) {
n = 0;
for_each_sg_page(obj->pages->sgl, &sg_iter, 
obj->pages->nents, 0)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index da1c6fe5b40e..dfabeee2ff0b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1766,11 +1766,9 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
 
-   exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
-GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
-   if (exec2_list == NULL)
-   exec2_list = drm_malloc_ab(sizeof(*exec2_list),
-  args->buffer_count);
+   exec2_list = drm_malloc_gfp(sizeof(*exec2_list),
+   args->buffer_count,
+   GFP_TEMPORARY);
if (exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
  args->buffer_count);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56f4f2e58d53..224fe89baca3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3376,8 +3376,9 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view 
*ggtt_view,
int ret = -ENOMEM;
 
/* Allocate a temporary list of source pages for random access. */
-   page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
-  sizeof(dma_addr_t));
+   page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+   sizeof(dma_addr_t),
+   GFP_TEMPORARY);
if (!page_addr_list)
return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 1a5f89dba4af..251e81c4b0ea 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -573,10 +573,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct 
*_work)
ret = -ENOMEM;
pinned = 0;
 
-   pvec = kmalloc(npages*sizeof(struct page *),
-  GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
-   if (pvec == NULL)
-   pvec = drm_malloc_ab(npages, sizeof(struct page *));
+   pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY);
if (pvec != NULL) {
struct mm_struct *mm = obj->userptr.mm->mm;
 
@@ -713,14 +710,10 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object 
*obj)
pvec = NULL;
pinned = 0;
if (obj->userptr.mm->mm == current->mm) {
-   pvec = kmalloc(num_pages*sizeof(struct page *),
-  GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+   pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), 
GFP_TEMPORARY);
if (pvec == NULL) {
-   pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
-   if (pvec == NULL) {
-   __i915_gem_userptr_set_active(obj, false);
-   return -ENOMEM;
-   }
+   __i915_gem_userptr_set_active(obj, false);
+   return -ENOMEM;
}
 
pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_u

[Intel-gfx] [PATCH 054/190] drm/i915: Use the new rq->i915 field where appropriate

2016-01-11 Thread Chris Wilson
In a few frequent cases, having a direct pointer to the drm_i915_private
from the request is very useful.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c|  7 +++---
 drivers/gpu/drm/i915/i915_gem_context.c| 21 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +--
 drivers/gpu/drm/i915/i915_gem_request.c|  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   |  6 ++
 drivers/gpu/drm/i915/intel_pm.c|  3 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 34 --
 7 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 31926a4fb42a..c2a1ec8abc11 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2568,7 +2568,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
return 0;
 
if (!i915.semaphores) {
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   struct drm_i915_private *i915 = from_req->i915;
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
  NULL,
@@ -4069,12 +4069,11 @@ err:
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
 {
struct intel_engine_cs *ring = req->ring;
-   struct drm_device *dev = ring->dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct drm_i915_private *dev_priv = req->i915;
u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
int i, ret;
 
-   if (!HAS_L3_DPF(dev) || !remap_info)
+   if (!HAS_L3_DPF(dev_priv) || !remap_info)
return 0;
 
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 361be1085a18..3e3b4bf3fed1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -524,7 +524,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
-   hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+   hweight32(INTEL_INFO(req->i915)->ring_mask) - 1 :
0;
int len, i, ret;
 
@@ -533,21 +533,21 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * explicitly, so we rely on the value at ring init, stored in
 * itlb_before_ctx_switch.
 */
-   if (IS_GEN6(ring->dev)) {
+   if (IS_GEN6(req->i915)) {
ret = ring->flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
 
/* These flags are for resource streamer on HSW+ */
-   if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8)
+   if (IS_HASWELL(req->i915) || INTEL_INFO(req->i915)->gen >= 8)
flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
-   else if (INTEL_INFO(ring->dev)->gen < 8)
+   else if (INTEL_INFO(req->i915)->gen < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
len = 4;
-   if (INTEL_INFO(ring->dev)->gen >= 7)
+   if (INTEL_INFO(req->i915)->gen >= 7)
len += 2 + (num_rings ? 4*num_rings + 2 : 0);
 
ret = intel_ring_begin(req, len);
@@ -555,13 +555,13 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
return ret;
 
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-   if (INTEL_INFO(ring->dev)->gen >= 7) {
+   if (INTEL_INFO(req->i915)->gen >= 7) {
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
if (num_rings) {
struct intel_engine_cs *signaller;
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-   for_each_ring(signaller, to_i915(ring->dev), i) {
+   for_each_ring(signaller, req->i915, i) {
if (signaller == ring)
continue;
 
@@ -581,12 +581,12 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 */
intel_ring_emit(ring, MI_NOOP);
 
-   if (INTEL_INFO(ring->dev)->gen >= 7) {
+   if (INTEL_INFO(req->i915)->gen >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
 
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
-   for_each_ring(signaller, to_i915(ring->dev), i) {
+   for_each_ring(signaller, req->i915, i) {
if (signaller == ring)
continue;
 
@@ -827,10 +827,9 @@ unpin_

[Intel-gfx] [PATCH 027/190] drm/i915: Only query timestamp when measuring elapsed time

2016-01-11 Thread Chris Wilson
Avoid the two calls to ktime_get_raw_ns() (at best it reads the TSC) as
we only need to compute the elapsed time for a timed wait.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a0744626a110..b956b8813307 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1220,7 +1220,6 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
struct intel_wait wait;
unsigned long timeout_remain;
-   s64 before, now;
int ret = 0;
 
might_sleep();
@@ -1239,13 +1238,12 @@ int __i915_wait_request(struct drm_i915_gem_request 
*req,
if (*timeout == 0)
return -ETIME;
 
+   /* Record current time in case interrupted, or wedged */
timeout_remain = nsecs_to_jiffies_timeout(*timeout);
+   *timeout += ktime_get_raw_ns();
}
 
-   /* Record current time in case interrupted by signal, or wedged */
trace_i915_gem_request_wait_begin(req);
-   before = ktime_get_raw_ns();
-
if (INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
@@ -1298,13 +1296,12 @@ wakeup:
 complete:
intel_engine_remove_wait(req->ring, &wait);
__set_task_state(wait.task, TASK_RUNNING);
-   now = ktime_get_raw_ns();
trace_i915_gem_request_wait_end(req);
 
if (timeout) {
-   s64 tres = *timeout - (now - before);
-
-   *timeout = tres < 0 ? 0 : tres;
+   *timeout -= ktime_get_raw_ns();
+   if (*timeout < 0)
+   *timeout = 0;
 
/*
 * Apparently ktime isn't accurate enough and occasionally has a
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking

2016-01-11 Thread Chris Wilson
In the next patch, request tracking is made more generic and for that we
need a new expanded struct and to separate out the logic changes from
the mechanical churn, we split out the structure renaming into this
patch.

v2: Writer's block. Add some spiel about why we track requests.
v3: Now i915_gem_active.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 10 +++---
 drivers/gpu/drm/i915/i915_drv.h|  9 +++--
 drivers/gpu/drm/i915/i915_gem.c| 56 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +--
 drivers/gpu/drm/i915/i915_gem_fence.c  |  6 ++--
 drivers/gpu/drm/i915/i915_gem_request.h| 38 
 drivers/gpu/drm/i915/i915_gem_tiling.c |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  6 ++--
 drivers/gpu/drm/i915/intel_display.c   | 10 +++---
 9 files changed, 89 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8de944ed3369..65cb1d6a5d64 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   obj->base.write_domain);
for_each_ring(ring, dev_priv, i)
seq_printf(m, "%x ",
-   
i915_gem_request_get_seqno(obj->last_read_req[i]));
+   
i915_gem_request_get_seqno(obj->last_read[i].request));
seq_printf(m, "] %x %x%s%s%s",
-  i915_gem_request_get_seqno(obj->last_write_req),
-  i915_gem_request_get_seqno(obj->last_fenced_req),
+  i915_gem_request_get_seqno(obj->last_write.request),
+  i915_gem_request_get_seqno(obj->last_fence.request),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
-   if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
+   if (obj->last_write.request != NULL)
+   seq_printf(m, " (%s)", obj->last_write.request->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cae448e238ca..c577f86d94f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2110,11 +2110,10 @@ struct drm_i915_gem_object {
 * requests on one ring where the write request is older than the
 * read request. This allows for the CPU to read from an active
 * buffer by only waiting for the write to complete.
-* */
-   struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
-   struct drm_i915_gem_request *last_write_req;
-   /** Breadcrumb of last fenced GPU access to the buffer. */
-   struct drm_i915_gem_request *last_fenced_req;
+*/
+   struct i915_gem_active last_read[I915_NUM_RINGS];
+   struct i915_gem_active last_write;
+   struct i915_gem_active last_fence;
 
/** Current tiling stride for the object, if it's tiled. */
uint32_t stride;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b0230e7151ce..77c253ddf060 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 
if (readonly) {
-   if (obj->last_write_req != NULL) {
-   ret = i915_wait_request(obj->last_write_req);
+   if (obj->last_write.request != NULL) {
+   ret = i915_wait_request(obj->last_write.request);
if (ret)
return ret;
 
-   i = obj->last_write_req->engine->id;
-   if (obj->last_read_req[i] == obj->last_write_req)
+   i = obj->last_write.request->engine->id;
+   if (obj->last_read[i].request == 
obj->last_write.request)
i915_gem_object_retire__read(obj, i);
else
i915_gem_object_retire__write(obj);
}
} else {
for (i = 0; i < I915_NUM_RINGS; i++) {
-   if (obj->last_read_req[i] == NULL)
+   if (obj->last_read[i].request == NULL)
continue;
 
-   ret = i915_wait_request(obj->last_read_req[i]);
+ 

[Intel-gfx] [PATCH 056/190] drm/i915: Unify intel_ring_begin()

2016-01-11 Thread Chris Wilson
Combine the near identical implementations of intel_logical_ring_begin()
and intel_ring_begin() - the only difference is that the logical wait
has to check for a matching ring (which is assumed by legacy).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c| 141 ++--
 drivers/gpu/drm/i915/intel_lrc.h|   1 -
 drivers/gpu/drm/i915/intel_mocs.c   |  12 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 111 +
 4 files changed, 69 insertions(+), 196 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index dc4fc9d8612c..3d14b69632e8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -698,48 +698,6 @@ int intel_logical_ring_alloc_request_extras(struct 
drm_i915_gem_request *request
return 0;
 }
 
-static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
-  int bytes)
-{
-   struct intel_ringbuffer *ringbuf = req->ringbuf;
-   struct intel_engine_cs *ring = req->ring;
-   struct drm_i915_gem_request *target;
-   unsigned space;
-   int ret;
-
-   if (intel_ring_space(ringbuf) >= bytes)
-   return 0;
-
-   /* The whole point of reserving space is to not wait! */
-   WARN_ON(ringbuf->reserved_in_use);
-
-   list_for_each_entry(target, &ring->request_list, list) {
-   /*
-* The request queue is per-engine, so can contain requests
-* from multiple ringbuffers. Here, we must ignore any that
-* aren't from the ringbuffer we're considering.
-*/
-   if (target->ringbuf != ringbuf)
-   continue;
-
-   /* Would completion of this request free enough space? */
-   space = __intel_ring_space(target->postfix, ringbuf->tail,
-  ringbuf->size);
-   if (space >= bytes)
-   break;
-   }
-
-   if (WARN_ON(&target->list == &ring->request_list))
-   return -ENOSPC;
-
-   ret = i915_wait_request(target);
-   if (ret)
-   return ret;
-
-   ringbuf->space = space;
-   return 0;
-}
-
 /*
  * intel_logical_ring_advance_and_submit() - advance the tail and submit the 
workload
  * @request: Request to advance the logical ringbuffer of.
@@ -763,89 +721,6 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
execlists_context_queue(request);
 }
 
-static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
-{
-   int rem = ringbuf->size - ringbuf->tail;
-   memset(ringbuf->virtual_start + ringbuf->tail, 0, rem);
-
-   ringbuf->tail = 0;
-   intel_ring_update_space(ringbuf);
-}
-
-static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
-{
-   struct intel_ringbuffer *ringbuf = req->ringbuf;
-   int remain_usable = ringbuf->effective_size - ringbuf->tail;
-   int remain_actual = ringbuf->size - ringbuf->tail;
-   int ret, total_bytes, wait_bytes = 0;
-   bool need_wrap = false;
-
-   if (ringbuf->reserved_in_use)
-   total_bytes = bytes;
-   else
-   total_bytes = bytes + ringbuf->reserved_size;
-
-   if (unlikely(bytes > remain_usable)) {
-   /*
-* Not enough space for the basic request. So need to flush
-* out the remainder and then wait for base + reserved.
-*/
-   wait_bytes = remain_actual + total_bytes;
-   need_wrap = true;
-   } else {
-   if (unlikely(total_bytes > remain_usable)) {
-   /*
-* The base request will fit but the reserved space
-* falls off the end. So only need to to wait for the
-* reserved size after flushing out the remainder.
-*/
-   wait_bytes = remain_actual + ringbuf->reserved_size;
-   need_wrap = true;
-   } else if (total_bytes > ringbuf->space) {
-   /* No wrapping required, just waiting. */
-   wait_bytes = total_bytes;
-   }
-   }
-
-   if (wait_bytes) {
-   ret = logical_ring_wait_for_space(req, wait_bytes);
-   if (unlikely(ret))
-   return ret;
-
-   if (need_wrap)
-   __wrap_ring_buffer(ringbuf);
-   }
-
-   return 0;
-}
-
-/**
- * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some 
commands
- *
- * @req: The request to start some new work for
- * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
- *
- * The ringbuffer might not be ready to accept the commands right away (maybe 
it needs to
- * be wrapped

[Intel-gfx] [PATCH 067/190] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START

2016-01-11 Thread Chris Wilson
Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly
the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer -
we need only one vfunc.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 +--
 drivers/gpu/drm/i915/i915_gem_render_state.c | 16 +++
 drivers/gpu/drm/i915/intel_lrc.c |  9 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 67 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 12 +++--
 5 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3956d74d8c8c..3e6384deca65 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1297,9 +1297,9 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
 params->args_batch_start_offset;
 
-   ret = params->ring->dispatch_execbuffer(params->request,
-   exec_start, exec_len,
-   params->dispatch_flags);
+   ret = params->ring->emit_bb_start(params->request,
+ exec_start, exec_len,
+ params->dispatch_flags);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index bee3f0ccd0cd..ccc988c2b226 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -205,18 +205,18 @@ int i915_gem_render_state_init(struct 
drm_i915_gem_request *req)
if (so.rodata == NULL)
return 0;
 
-   ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
-  so.rodata->batch_items * 4,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
 
if (so.aux_batch_size > 8) {
-   ret = req->engine->dispatch_execbuffer(req,
-  (so.ggtt_offset +
-   so.aux_batch_offset),
-  so.aux_batch_size,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req,
+(so.ggtt_offset +
+ so.aux_batch_offset),
+so.aux_batch_size,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 82b21a883732..30effca91184 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -783,7 +783,9 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
 args->batch_start_offset;
 
-   ret = engine->emit_bb_start(params->request, exec_start, 
params->dispatch_flags);
+   ret = engine->emit_bb_start(params->request,
+   exec_start, args->batch_len,
+   params->dispatch_flags);
if (ret)
return ret;
 
@@ -1409,7 +1411,8 @@ static int intel_logical_ring_emit_pdps(struct 
drm_i915_gem_request *req)
 }
 
 static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
- u64 offset, unsigned dispatch_flags)
+ u64 offset, u32 len,
+ unsigned dispatch_flags)
 {
struct intel_ring *ring = req->ring;
bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
@@ -1637,12 +1640,14 @@ static int intel_lr_context_render_state_init(struct 
drm_i915_gem_request *req)
return 0;
 
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
 
ret = req->engine->emit_bb_start(req,
 (so.ggtt_offset + so.aux_batch_offset),
+so.aux_batch_size,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/i

[Intel-gfx] [PATCH 038/190] drm/i915: Flush the RPS bottom-half when the GPU idles

2016-01-11 Thread Chris Wilson
Make sure that the RPS bottom-half is flushed before we set the idle
frequency when we decide the GPU is idle. This should prevent any races
with the bottom-half and setting the idle frequency, and ensures that
the bottom-half is bounded by the GPU's rpm reference taken for when it
is active (i.e. between gen6_rps_busy() and gen6_rps_idle()).

v2: Avoid recursively using the i915->wq - RPS does not touch the
struct_mutex so has no place being on the ordered i915->wq.
v3: Enable/disable interrupts for RPS busy/idle in order to prevent
further HW access from RPS outside of the wakeref.

Signed-off-by: Chris Wilson 
Cc: Imre Deak 
Cc: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_drv.c  |  1 -
 drivers/gpu/drm/i915/i915_irq.c  | 45 +++-
 drivers/gpu/drm/i915/intel_display.c |  1 +
 drivers/gpu/drm/i915/intel_drv.h |  6 ++---
 drivers/gpu/drm/i915/intel_pm.c  | 23 +-
 5 files changed, 34 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4c090f1cf69c..442e1217e442 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1492,7 +1492,6 @@ static int intel_runtime_suspend(struct device *device)
 
intel_guc_suspend(dev);
 
-   intel_suspend_gt_powersave(dev);
intel_runtime_pm_disable_interrupts(dev_priv);
 
ret = intel_suspend_complete(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8866e981bcba..d9757d227c86 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -336,9 +336,8 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, 
uint32_t mask)
__gen6_disable_pm_irq(dev_priv, mask);
 }
 
-void gen6_reset_rps_interrupts(struct drm_device *dev)
+void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
i915_reg_t reg = gen6_pm_iir(dev_priv);
 
spin_lock_irq(&dev_priv->irq_lock);
@@ -349,14 +348,14 @@ void gen6_reset_rps_interrupts(struct drm_device *dev)
spin_unlock_irq(&dev_priv->irq_lock);
 }
 
-void gen6_enable_rps_interrupts(struct drm_device *dev)
+void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   if (dev_priv->rps.interrupts_enabled)
+   return;
 
spin_lock_irq(&dev_priv->irq_lock);
-
-   WARN_ON(dev_priv->rps.pm_iir);
-   WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
+   WARN_ON_ONCE(dev_priv->rps.pm_iir);
+   WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & 
dev_priv->pm_rps_events);
dev_priv->rps.interrupts_enabled = true;
I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) |
dev_priv->pm_rps_events);
@@ -382,17 +381,13 @@ u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private 
*dev_priv, u32 mask)
return mask;
 }
 
-void gen6_disable_rps_interrupts(struct drm_device *dev)
+void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   if (!dev_priv->rps.interrupts_enabled)
+   return;
 
spin_lock_irq(&dev_priv->irq_lock);
dev_priv->rps.interrupts_enabled = false;
-   spin_unlock_irq(&dev_priv->irq_lock);
-
-   cancel_work_sync(&dev_priv->rps.work);
-
-   spin_lock_irq(&dev_priv->irq_lock);
 
I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
@@ -401,8 +396,15 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
~dev_priv->pm_rps_events);
 
spin_unlock_irq(&dev_priv->irq_lock);
+   synchronize_irq(dev_priv->dev->irq);
 
-   synchronize_irq(dev->irq);
+   /* Now that we will not be generating any more work, flush any
+* outsanding tasks. As we are called on the RPS idle path,
+* we will reset the GPU to minimum frequencies, so the current
+* state of the worker can be discarded.
+*/
+   cancel_work_sync(&dev_priv->rps.work);
+   gen6_reset_rps_interrupts(dev_priv);
 }
 
 /**
@@ -1103,13 +1105,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
return;
}
 
-   /*
-* The RPS work is synced during runtime suspend, we don't require a
-* wakeref. TODO: instead of disabling the asserts make sure that we
-* always hold an RPM reference while the work is running.
-*/
-   DISABLE_RPM_WAKEREF_ASSERTS(dev_priv);
-
pm_iir = dev_priv->rps.pm_iir;
dev_priv->rps.pm_iir = 0;
/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
@@ -1122,7 +1117,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
 
if ((pm_iir & dev_pri

[Intel-gfx] [PATCH 029/190] drm/i915: Convert trace-irq to the breadcrumb waiter

2016-01-11 Thread Chris Wilson
If we convert the tracing over from direct use of ring->irq_get() and
over to the breadcrumb infrastructure, we only have a single user of the
ring->irq_get and so we will be able to simplify the driver routines
(eliminating the redundant validation and irq refcounting).

v2: Move to a signaling framework based upon the waiter.
v3: Track the first-signal to avoid having to walk the rbtree everytime.
v4: Mark the signaler thread as RT priority to reduce latency in the
indirect wakeups.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |   8 --
 drivers/gpu/drm/i915/i915_gem.c  |   6 --
 drivers/gpu/drm/i915/i915_irq.c  |   7 +-
 drivers/gpu/drm/i915/i915_trace.h|   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 177 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   7 +-
 6 files changed, 186 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8940b8d3fa59..7f021505e32f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3620,14 +3620,6 @@ wait_remaining_ms_from_jiffies(unsigned long 
timestamp_jiffies, int to_wait_ms)
schedule_timeout_uninterruptible(remaining_jiffies);
}
 }
-
-static inline void i915_trace_irq_get(struct intel_engine_cs *ring,
- struct drm_i915_gem_request *req)
-{
-   if (ring->trace_irq_req == NULL && ring->irq_get(ring))
-   i915_gem_request_assign(&ring->trace_irq_req, req);
-}
-
 static inline bool __i915_request_irq_complete(struct drm_i915_gem_request 
*req)
 {
struct intel_engine_cs *engine = req->ring;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a713e8a6cb36..5ddb2ed0f785 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2889,12 +2889,6 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
i915_gem_object_retire__read(obj, ring->id);
}
 
-   if (unlikely(ring->trace_irq_req &&
-i915_gem_request_completed(ring->trace_irq_req))) {
-   ring->irq_put(ring);
-   i915_gem_request_assign(&ring->trace_irq_req, NULL);
-   }
-
WARN_ON(i915_verify_lists(ring->dev));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 738edd7fbf8d..bf48fa63127a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -996,12 +996,9 @@ static void ironlake_rps_change_irq_handler(struct 
drm_device *dev)
 
 static void notify_ring(struct intel_engine_cs *ring)
 {
-   if (!intel_ring_initialized(ring))
-   return;
-
-   trace_i915_gem_request_notify(ring);
ring->irq_posted = true; /* paired with mb() in wake_up_process() */
-   intel_engine_wakeup(ring);
+   if (intel_engine_wakeup(ring))
+   trace_i915_gem_request_notify(ring);
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index efca75bcace3..43bb2e0bb949 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -503,7 +503,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
   __entry->ring = ring->id;
   __entry->seqno = i915_gem_request_get_seqno(req);
   __entry->flags = flags;
-  i915_trace_irq_get(ring, req);
+  intel_engine_enable_signaling(req);
   ),
 
TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index d689bd61534e..cf9cbcc2d5d7 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -22,6 +22,8 @@
  *
  */
 
+#include 
+
 #include "i915_drv.h"
 
 static void intel_breadcrumbs_fake_irq(unsigned long data)
@@ -320,10 +322,185 @@ void intel_engine_init_breadcrumbs(struct 
intel_engine_cs *engine)
(unsigned long)engine);
 }
 
+struct signal {
+   struct rb_node node;
+   struct intel_wait wait;
+   struct drm_i915_gem_request *request;
+};
+
+static bool signal_complete(struct signal *signal)
+{
+   if (signal == NULL)
+   return false;
+
+   /* If another process served as the bottom-half it may have already
+* signalled that this wait is already completed.
+*/
+   if (intel_wait_complete(&signal->wait))
+   return true;
+
+   /* Carefully check if the request is complete, giving time for the
+* seqno to be visible or if the GPU hung.
+*/
+   if (__i915_request_irq_complete(signal->request))
+   return true;
+
+   return false;
+}
+
+static struct signal *to_s

[Intel-gfx] [PATCH 068/190] drm/i915: Unify adding requests between ringbuffer and execlists

2016-01-11 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c |   8 +-
 drivers/gpu/drm/i915/intel_lrc.c|  14 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 129 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  21 +++---
 4 files changed, 87 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index ce663acc9c7d..01443d8d9224 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -434,13 +434,7 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 */
request->postfix = intel_ring_get_tail(ring);
 
-   if (i915.enable_execlists)
-   ret = request->engine->emit_request(request);
-   else {
-   ret = request->engine->add_request(request);
-
-   request->tail = intel_ring_get_tail(ring);
-   }
+   ret = request->engine->add_request(request);
/* Not allowed to fail! */
WARN(ret, "emit|add_request failed: %d!\n", ret);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 30effca91184..9838503fafca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -445,7 +445,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *engine)
if (req0->elsp_submitted) {
/*
 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
-* as we resubmit the request. See gen8_emit_request()
+* as we resubmit the request. See gen8_add_request()
 * for where we prepare the padding after the end of the
 * request.
 */
@@ -1588,7 +1588,7 @@ gen6_seqno_barrier(struct intel_engine_cs *ring)
intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static int gen8_add_request(struct drm_i915_gem_request *request)
 {
struct intel_ring *ring = request->ring;
u32 cmd;
@@ -1782,8 +1782,8 @@ static int logical_render_ring_init(struct drm_device 
*dev)
ring->init_context = gen8_init_rcs_context;
ring->cleanup = intel_fini_pipe_control;
ring->irq_seqno_barrier = gen6_seqno_barrier;
-   ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush_render;
+   ring->add_request = gen8_add_request;
ring->irq_enable = gen8_logical_ring_enable_irq;
ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
@@ -1828,8 +1828,8 @@ static int logical_bsd_ring_init(struct drm_device *dev)
 
ring->init_hw = gen8_init_common_ring;
ring->irq_seqno_barrier = gen6_seqno_barrier;
-   ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
+   ring->add_request = gen8_add_request;
ring->irq_enable = gen8_logical_ring_enable_irq;
ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
@@ -1852,8 +1852,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
 
ring->init_hw = gen8_init_common_ring;
ring->irq_seqno_barrier = gen6_seqno_barrier;
-   ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
+   ring->add_request = gen8_add_request;
ring->irq_enable = gen8_logical_ring_enable_irq;
ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
@@ -1876,8 +1876,8 @@ static int logical_blt_ring_init(struct drm_device *dev)
 
ring->init_hw = gen8_init_common_ring;
ring->irq_seqno_barrier = gen6_seqno_barrier;
-   ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
+   ring->add_request = gen8_add_request;
ring->irq_enable = gen8_logical_ring_enable_irq;
ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
@@ -1900,8 +1900,8 @@ static int logical_vebox_ring_init(struct drm_device *dev)
 
ring->init_hw = gen8_init_common_ring;
ring->irq_seqno_barrier = gen6_seqno_barrier;
-   ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
+   ring->add_request = gen8_add_request;
ring->irq_enable = gen8_logical_ring_enable_irq;
ring->irq_disable = gen8_logical_ring_disable_irq;
ring->emit_bb_start = gen8_emit_bb_start;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 04f0a77d49cf..556e9e2c1fec 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -59,13 +59,6 @@ int intel_ring_space(struct intel_ring *ringbuf)
return ringbuf->space;
 

[Intel-gfx] [PATCH 022/190] drm/i915: Check the CPU cached value of seqno after waking the waiter

2016-01-11 Thread Chris Wilson
If we have multiple waiters, we may find that many complete on the same
wake up. If we first inspect the seqno from the CPU cache, we may reduce
the number of heavyweight coherent seqno reads we require.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fcedcbc50834..c2ee8efdd928 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3632,6 +3632,12 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 {
struct intel_engine_cs *engine = req->ring;
 
+   /* Before we do the heavier coherent read of the seqno,
+* check the value (hopefully) in the CPU cacheline.
+*/
+   if (i915_gem_request_completed(req))
+   return true;
+
/* Ensure our read of the seqno is coherent so that we
 * do not "miss an interrupt" (i.e. if this is the last
 * request and the seqno write from the GPU is not visible
@@ -3643,11 +3649,11 @@ static inline bool __i915_request_irq_complete(struct 
drm_i915_gem_request *req)
 * but it is easier and safer to do it every time the waiter
 * is woken.
 */
-   if (engine->irq_seqno_barrier)
+   if (engine->irq_seqno_barrier) {
engine->irq_seqno_barrier(engine);
-
-   if (i915_gem_request_completed(req))
-   return true;
+   if (i915_gem_request_completed(req))
+   return true;
+   }
 
/* We need to check whether any gpu reset happened in between
 * the request being submitted and now. If a reset has occurred,
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 065/190] drm/i915: Remove obsolete engine->gpu_caches_dirty

2016-01-11 Thread Chris Wilson
Space for flushing the GPU cache prior to completing the request is
preallocated and so cannot fail.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +---
 drivers/gpu/drm/i915/i915_gem_gtt.c| 18 
 drivers/gpu/drm/i915/i915_gem_request.c|  7 ++-
 drivers/gpu/drm/i915/intel_lrc.c   | 47 +++
 drivers/gpu/drm/i915/intel_lrc.h   |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c| 72 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h|  7 ---
 8 files changed, 39 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 17fe8ed991d6..c078ebc29da5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -534,7 +534,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * itlb_before_ctx_switch.
 */
if (IS_GEN6(req->i915)) {
-   ret = req->engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
+   ret = req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 731ce13dbdbc..a56fae99a1bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -969,10 +969,8 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();
 
-   /* Unconditionally invalidate gpu caches and ensure that we do flush
-* any residual writes from the previous batch.
-*/
-   return intel_engine_invalidate_all_caches(req);
+   /* Unconditionally invalidate gpu caches and TLBs. */
+   return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
 }
 
 static bool
@@ -1138,9 +1136,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 static void
 i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
 {
-   /* Unconditionally force add_request to emit a full flush. */
-   params->ring->gpu_caches_dirty = true;
-
/* Add a breadcrumb for the completion of the batch buffer */
__i915_add_request(params->request, params->batch_obj, true);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9a91451d66ac..cddbd8c00663 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1652,9 +1652,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
 
@@ -1690,9 +1690,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
 
@@ -1710,9 +1710,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
/* XXX: RCS is the only one to auto invalidate the TLBs? */
if (req->engine->id != RCS) {
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index e1f2af046b6c..e911430575fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -426,10 +426,9 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 * what.
 */
if (flush_caches) {
-   if (i915.enable_execlists)
-   ret = logical_ring_flush_all_caches(request);
-   else
-   ret = intel_engine_flush_all_caches(request);
+   ret = request->engine->emit_flush(request,
+

[Intel-gfx] [PATCH 060/190] drm/i915: Rename backpointer from intel_ringbuffer to intel_engine_cs

2016-01-11 Thread Chris Wilson
Having ringbuf->ring point to an engine is confusing, so rename it once
again to ring->engine.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 10 +++---
 drivers/gpu/drm/i915/intel_lrc.c   | 35 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 54 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h|  2 +-
 4 files changed, 49 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index e82cc9182dfa..53abe2143f8a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -391,7 +391,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
for (i = 0; i < I915_NUM_RINGS; i++) {
struct guc_execlist_context *lrc = &desc.lrc[i];
struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
-   struct intel_engine_cs *ring;
+   struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
uint64_t ctx_desc;
 
@@ -406,15 +406,15 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
if (!obj)
break;  /* XXX: continue? */
 
-   ring = ringbuf->ring;
-   ctx_desc = intel_lr_context_descriptor(ctx, ring);
+   engine = ringbuf->engine;
+   ctx_desc = intel_lr_context_descriptor(ctx, engine);
lrc->context_desc = (u32)ctx_desc;
 
/* The state page is after PPHWSP */
lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
LRC_STATE_PN * PAGE_SIZE;
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-   (ring->id << GUC_ELC_ENGINE_OFFSET);
+   (engine->id << GUC_ELC_ENGINE_OFFSET);
 
obj = ringbuf->obj;
 
@@ -423,7 +423,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->ring_next_free_location = lrc->ring_begin;
lrc->ring_current_tail_pointer_value = 0;
 
-   desc.engines_used |= (1 << ring->id);
+   desc.engines_used |= (1 << engine->id);
}
 
WARN_ON(desc.engines_used == 0);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 87d325b6e7dc..8639ebfab96f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2179,13 +2179,13 @@ void intel_lr_context_free(struct intel_context *ctx)
if (ctx_obj) {
struct intel_ringbuffer *ringbuf =
ctx->engine[i].ringbuf;
-   struct intel_engine_cs *ring = ringbuf->ring;
+   struct intel_engine_cs *engine = ringbuf->engine;
 
-   if (ctx == ring->default_context) {
+   if (ctx == engine->default_context) {
intel_unpin_ringbuffer_obj(ringbuf);
i915_gem_object_ggtt_unpin(ctx_obj);
}
-   WARN_ON(ctx->engine[ring->id].pin_count);
+   WARN_ON(ctx->engine[engine->id].pin_count);
intel_ringbuffer_free(ringbuf);
drm_gem_object_unreference(&ctx_obj->base);
}
@@ -2261,57 +2261,54 @@ static void lrc_setup_hardware_status_page(struct 
intel_engine_cs *ring,
  *
  * Return: non-zero on error.
  */
-
 int intel_lr_context_deferred_alloc(struct intel_context *ctx,
-struct intel_engine_cs *ring)
+   struct intel_engine_cs *engine)
 {
-   struct drm_device *dev = ring->dev;
struct drm_i915_gem_object *ctx_obj;
uint32_t context_size;
struct intel_ringbuffer *ringbuf;
int ret;
 
WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
-   WARN_ON(ctx->engine[ring->id].state);
+   WARN_ON(ctx->engine[engine->id].state);
 
-   context_size = round_up(intel_lr_context_size(ring), 4096);
+   context_size = round_up(intel_lr_context_size(engine), 4096);
 
/* One extra page as the sharing data between driver and GuC */
context_size += PAGE_SIZE * LRC_PPHWSP_PN;
 
-   ctx_obj = i915_gem_alloc_object(dev, context_size);
+   ctx_obj = i915_gem_alloc_object(engine->dev, context_size);
if (!ctx_obj) {
DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
return -ENOMEM;
}
 
-   ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
+   ringbuf = intel_engine_create_ringbuffer(engine, 4 * PAGE_SIZE);
if (IS_ERR(ringbuf)) {
ret = PTR_ERR(ringbuf);
goto error_deref_obj;
}
 
-   ret = populate_lr_context(ctx, ctx_obj, rin

[Intel-gfx] [PATCH 046/190] drm/i915: Derive GEM requests from dma-fence

2016-01-11 Thread Chris Wilson
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.

v2: The fence_context needs to be globally unique, not just unique to
this device.

Signed-off-by: Chris Wilson 
Cc: Jesse Barnes 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 111 +
 drivers/gpu/drm/i915/i915_gem_request.h|  33 -
 drivers/gpu/drm/i915/i915_gpu_error.c  |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |   2 +-
 drivers/gpu/drm/i915/i915_trace.h  |   2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |   3 +-
 drivers/gpu/drm/i915/intel_lrc.c   |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c|  15 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h|   1 +
 10 files changed, 133 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6172649b7e56..b82482573a8f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -710,7 +710,7 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, "%x @ %d: %s [%d]\n",
-  req->seqno,
+  req->fence.seqno,
   (int) (jiffies - req->emitted_jiffies),
   task ? task->comm : "",
   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 1c4f4d83a3c2..e366ca0dcd99 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,92 @@
 
 #include "i915_drv.h"
 
+static inline struct drm_i915_gem_request *
+to_i915_request(struct fence *fence)
+{
+   return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+   return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+   return to_i915_request(fence)->ring->name;
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+   return i915_gem_request_completed(to_i915_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+   if (i915_fence_signaled(fence))
+   return false;
+
+   return intel_engine_enable_signaling(to_i915_request(fence)) == 0;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+  bool interruptible,
+  signed long timeout_jiffies)
+{
+   s64 timeout_ns, *timeout;
+   int ret;
+
+   if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+   timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+   timeout = &timeout_ns;
+   } else
+   timeout = NULL;
+
+   ret = __i915_wait_request(to_i915_request(fence),
+ interruptible, timeout,
+ NULL);
+   if (ret == -ETIME)
+   return 0;
+
+   if (ret < 0)
+   return ret;
+
+   if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+   timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+   return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+   snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+ int size)
+{
+   snprintf(str, size, "%u",
+intel_ring_get_seqno(to_i915_request(fence)->ring));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+   struct drm_i915_gem_request *req = to_i915_request(fence);
+   kmem_cache_free(req->i915->requests, req);
+}
+
+static const struct fence_ops i915_fence_ops = {
+   .get_driver_name = i915_fence_get_driver_name,
+   .get_timeline_name = i915_fence_get_timeline_name,
+   .enable_signaling = i915_fence_enable_signaling,
+   .signaled = i915_fence_signaled,
+   .wait = i915_fence_wait,
+   .release = i915_fence_release,
+   .fence_value_str = i915_fence_value_str,
+   .timeline_value_str = i915_fence_timeline_value_str,
+};
+
 static int
 i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
 {
@@ -116,6 +202,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
struct drm_i915_private *dev_priv = to_i915(ring->dev);
unsigned reset_counter = i915_reset_co

[Intel-gfx] [PATCH 016/190] drm/i915: Make queueing the hangcheck work inline

2016-01-11 Thread Chris Wilson
Since the function is a small wrapper around schedule_delayed_work(),
move it inline to remove the function call overhead for the principle
caller.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h | 17 -
 drivers/gpu/drm/i915/i915_irq.c | 16 
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 188bed933f11..201dd330f66a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2709,7 +2709,22 @@ void intel_hpd_cancel_work(struct drm_i915_private 
*dev_priv);
 bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
 
 /* i915_irq.c */
-void i915_queue_hangcheck(struct drm_i915_private *dev_priv);
+static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
+{
+   unsigned long delay;
+
+   if (unlikely(!i915.enable_hangcheck))
+   return;
+
+   /* Don't continually defer the hangcheck so that it is always run at
+* least once after work has been scheduled on any ring. Otherwise,
+* we will ignore a hung ring if a second ring is kept busy.
+*/
+
+   delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
+   schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay);
+}
+
 __printf(3, 4)
 void i915_handle_error(struct drm_device *dev, bool wedged,
   const char *fmt, ...);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8939438d747d..2a8a9694eec5 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3173,22 +3173,6 @@ out:
ENABLE_RPM_WAKEREF_ASSERTS(dev_priv);
 }
 
-void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
-{
-   unsigned long delay;
-
-   if (!i915.enable_hangcheck)
-   return;
-
-   /* Don't continually defer the hangcheck so that it is always run at
-* least once after work has been scheduled on any ring. Otherwise,
-* we will ignore a hung ring if a second ring is kept busy.
-*/
-
-   delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
-   schedule_delayed_work(&dev_priv->gpu_error.hangcheck_work, delay);
-}
-
 static void ibx_irq_reset(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 009/190] drm/i915: Tighten reset_counter for reset status

2016-01-11 Thread Chris Wilson
In the reset_counter, we use two bits to track a GPU hang and reset. The
low bit is a "reset-in-progress" flag that we set to signal when we need
to break waiters in order for the recovery task to grab the mutex. As
soon as the recovery task has the mutex, we can clear that flag (which
we do by incrementing the reset_counter thereby incrementing the gobal
reset epoch). By clearing that flag when the recovery task holds the
struct_mutex, we can forgo a second flag that simply tells GEM to ignore
the "reset-in-progress" flag.

The second flag we store in the reset_counter is whether the
reset failed and we consider the GPU terminally wedged. Whilst this flag
is set, all access to the GPU (at least through GEM rather than direct mmio
access) is verboten.

PS: Fun is in store, as in the future we want to move from a global
reset epoch to a per-engine reset engine with request recovery.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.c | 39 ++---
 drivers/gpu/drm/i915/i915_drv.h |  3 ---
 drivers/gpu/drm/i915/i915_gem.c | 27 +
 drivers/gpu/drm/i915/i915_irq.c | 21 ++--
 5 files changed, 36 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 932af05b8eec..6ff2d23faaa7 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4696,7 +4696,7 @@ i915_wedged_get(void *data, u64 *val)
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = dev->dev_private;
 
-   *val = i915_reset_counter(&dev_priv->gpu_error);
+   *val = i915_terminally_wedged(&dev_priv->gpu_error);
 
return 0;
 }
@@ -4715,7 +4715,7 @@ i915_wedged_set(void *data, u64 val)
 * while it is writing to 'i915_wedged'
 */
 
-   if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error))
+   if (i915_reset_in_progress(&dev_priv->gpu_error))
return -EAGAIN;
 
intel_runtime_pm_get(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 288fec7691dc..2f03379cdb4b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -873,23 +873,32 @@ int i915_resume_switcheroo(struct drm_device *dev)
 int i915_reset(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
-   bool simulated;
+   struct i915_gpu_error *error = &dev_priv->gpu_error;
+   unsigned reset_counter;
int ret;
 
intel_reset_gt_powersave(dev);
 
mutex_lock(&dev->struct_mutex);
 
-   i915_gem_reset(dev);
+   /* Clear any previous failed attempts at recovery. Time to try again. */
+   atomic_andnot(I915_WEDGED, &error->reset_counter);
 
-   simulated = dev_priv->gpu_error.stop_rings != 0;
+   /* Clear the reset-in-progress flag and increment the reset epoch. */
+   reset_counter = atomic_inc_return(&error->reset_counter);
+   if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
+   ret = -EIO;
+   goto error;
+   }
+
+   i915_gem_reset(dev);
 
ret = intel_gpu_reset(dev);
 
/* Also reset the gpu hangman. */
-   if (simulated) {
+   if (error->stop_rings != 0) {
DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
-   dev_priv->gpu_error.stop_rings = 0;
+   error->stop_rings = 0;
if (ret == -ENODEV) {
DRM_INFO("Reset not implemented, but ignoring "
 "error for simulated gpu hangs\n");
@@ -902,8 +911,7 @@ int i915_reset(struct drm_device *dev)
 
if (ret) {
DRM_ERROR("Failed to reset chip: %i\n", ret);
-   mutex_unlock(&dev->struct_mutex);
-   return ret;
+   goto error;
}
 
intel_overlay_reset(dev_priv);
@@ -922,20 +930,14 @@ int i915_reset(struct drm_device *dev)
 * was running at the time of the reset (i.e. we weren't VT
 * switched away).
 */
-
-   /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset 
*/
-   dev_priv->gpu_error.reload_in_reset = true;
-
ret = i915_gem_init_hw(dev);
-
-   dev_priv->gpu_error.reload_in_reset = false;
-
-   mutex_unlock(&dev->struct_mutex);
if (ret) {
DRM_ERROR("Failed hw init on reset %d\n", ret);
-   return ret;
+   goto error;
}
 
+   mutex_unlock(&dev->struct_mutex);
+
/*
 * rps/rc6 re-init is necessary to restore state lost after the
 * reset and the re-install of gt irqs. Skip for ironlake per
@@ -946,6 +948,11 @@ int i915_reset(struct drm_device *dev)
intel_enable_gt_powersave(dev);
 
return 0;

[Intel-gfx] [PATCH 039/190] drm/i915: Remove stop-rings debugfs interface

2016-01-11 Thread Chris Wilson
Now that we have (near) universal GPU recovery code, we can inject a
real hang from userspace and not need any fakery. Not only does this
mean that the testing is far more realistic, but we can simplify the
kernel in the process.

v2: Replace the i915_stop_rings with a dummy implementation as igt
encodified its existence until we can release an update.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 19 +--
 drivers/gpu/drm/i915/i915_drv.c | 17 ++---
 drivers/gpu/drm/i915/i915_drv.h | 19 ---
 drivers/gpu/drm/i915/i915_gem.c | 13 +++--
 drivers/gpu/drm/i915/intel_lrc.c|  5 -
 drivers/gpu/drm/i915/intel_ringbuffer.c |  8 
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 7 files changed, 6 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 567f8db4c70a..6172649b7e56 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4752,30 +4752,13 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 static int
 i915_ring_stop_get(void *data, u64 *val)
 {
-   struct drm_device *dev = data;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-
-   *val = dev_priv->gpu_error.stop_rings;
-
+   *val = 0;
return 0;
 }
 
 static int
 i915_ring_stop_set(void *data, u64 val)
 {
-   struct drm_device *dev = data;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   int ret;
-
-   DRM_DEBUG_DRIVER("Stopping rings 0x%08llx\n", val);
-
-   ret = mutex_lock_interruptible(&dev->struct_mutex);
-   if (ret)
-   return ret;
-
-   dev_priv->gpu_error.stop_rings = val;
-   mutex_unlock(&dev->struct_mutex);
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 442e1217e442..e9f85fd0542f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -891,24 +891,11 @@ int i915_reset(struct drm_device *dev)
goto error;
}
 
+   pr_notice("drm/i915: Resetting chip after gpu hang\n");
+
i915_gem_reset(dev);
 
ret = intel_gpu_reset(dev);
-
-   /* Also reset the gpu hangman. */
-   if (error->stop_rings != 0) {
-   DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
-   error->stop_rings = 0;
-   if (ret == -ENODEV) {
-   DRM_INFO("Reset not implemented, but ignoring "
-"error for simulated gpu hangs\n");
-   ret = 0;
-   }
-   }
-
-   if (i915_stop_ring_allow_warn(dev_priv))
-   pr_notice("drm/i915: Resetting chip after gpu hang\n");
-
if (ret) {
if (ret != -ENODEV)
DRM_ERROR("Failed to reset chip: %i\n", ret);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9ec6f3e9e74d..c3b795f1566b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1371,13 +1371,6 @@ struct i915_gpu_error {
 */
wait_queue_head_t reset_queue;
 
-   /* Userspace knobs for gpu hang simulation;
-* combines both a ring mask, and extra flags
-*/
-   u32 stop_rings;
-#define I915_STOP_RING_ALLOW_BAN   (1 << 31)
-#define I915_STOP_RING_ALLOW_WARN  (1 << 30)
-
/* For missed irq/seqno simulation. */
unsigned long test_irq_rings;
 };
@@ -3030,18 +3023,6 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
 }
 
-static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
-{
-   return dev_priv->gpu_error.stop_rings == 0 ||
-   dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN;
-}
-
-static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
-{
-   return dev_priv->gpu_error.stop_rings == 0 ||
-   dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN;
-}
-
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3948e85eaa48..ea9344503bf6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2633,21 +2633,14 @@ static bool i915_context_is_banned(struct 
drm_i915_private *dev_priv,
 {
unsigned long elapsed;
 
-   elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
-
if (ctx->hang_stats.banned)
return true;
 
+   elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
if (ctx->hang_stats.ban_period_seconds &&
elapsed <= ctx->hang_stats.ban_period_seconds) {
-  

[Intel-gfx] [PATCH 024/190] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor

2016-01-11 Thread Chris Wilson
When reading from the HWS page, we use barrier() to prevent the compiler
optimising away the read from the volatile (may be updated by the GPU)
memory address. This is more suited to READ_ONCE(); make it so.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6cc8e9c5f8d6..8f305ce253ae 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -418,8 +418,7 @@ intel_read_status_page(struct intel_engine_cs *ring,
   int reg)
 {
/* Ensure that the compiler doesn't optimize away the load. */
-   barrier();
-   return ring->status_page.page_addr[reg];
+   return READ_ONCE(ring->status_page.page_addr[reg]);
 }
 
 static inline void
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 075/190] drm/i915: Refactor activity tracking for requests

2016-01-11 Thread Chris Wilson
With the introduction of requests, we amplified the number of atomic
refcounted objects we use and update every execbuffer; from none to
several references, and a set of references that need to be changed. We
also introduced interesting side-effects in the order of retiring
requests and objects.

Instead of independently tracking the last request for an object, track
the active objects for each request. The object will reside in the
buffer list of its most recent active request and so we reduce the kref
interchange to a list_move. Now retirements are entirely driven by the
request, dramatically simplifying activity tracking on the object
themselves, and removing the ambiguity between retiring objects and
retiring requests.

All told, less code, simpler and faster, and more extensible.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile   |   1 -
 drivers/gpu/drm/i915/i915_drv.h |  10 --
 drivers/gpu/drm/i915/i915_gem.c | 160 
 drivers/gpu/drm/i915/i915_gem_debug.c   |  70 --
 drivers/gpu/drm/i915/i915_gem_fence.c   |  10 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  44 +++--
 drivers/gpu/drm/i915/i915_gem_request.h |  16 +++-
 drivers/gpu/drm/i915/intel_lrc.c|   1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  12 ---
 10 files changed, 89 insertions(+), 236 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_debug.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b0a83215db80..79d657f29241 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -23,7 +23,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 i915-y += i915_cmd_parser.o \
  i915_gem_batch_pool.o \
  i915_gem_context.o \
- i915_gem_debug.o \
  i915_gem_dmabuf.o \
  i915_gem_evict.o \
  i915_gem_execbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c577f86d94f8..c9c1a5cdc1e5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -435,8 +435,6 @@ void intel_link_compute_m_n(int bpp, int nlanes,
 #define DRIVER_MINOR   6
 #define DRIVER_PATCHLEVEL  0
 
-#define WATCH_LISTS0
-
 struct opregion_header;
 struct opregion_acpi;
 struct opregion_swsci;
@@ -2024,7 +2022,6 @@ struct drm_i915_gem_object {
struct drm_mm_node *stolen;
struct list_head global_list;
 
-   struct list_head ring_list[I915_NUM_RINGS];
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
@@ -3068,13 +3065,6 @@ static inline bool 
i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
obj->tiling_mode != I915_TILING_NONE;
 }
 
-/* i915_gem_debug.c */
-#if WATCH_LISTS
-int i915_verify_lists(struct drm_device *dev);
-#else
-#define i915_verify_lists(dev) 0
-#endif
-
 /* i915_debugfs.c */
 int i915_debugfs_init(struct drm_minor *minor);
 void i915_debugfs_cleanup(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f314b3ea2726..4eef13ebdaf3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -40,10 +40,6 @@
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object 
*obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object 
*obj);
-static void
-i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
-static void
-i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
 
 static bool cpu_cache_is_coherent(struct drm_device *dev,
  enum i915_cache_level level)
@@ -117,7 +113,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
if (ret)
return ret;
 
-   WARN_ON(i915_verify_lists(dev));
return 0;
 }
 
@@ -1117,27 +1112,14 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 
if (readonly) {
-   if (obj->last_write.request != NULL) {
-   ret = i915_wait_request(obj->last_write.request);
-   if (ret)
-   return ret;
-
-   i = obj->last_write.request->engine->id;
-   if (obj->last_read[i].request == 
obj->last_write.request)
-   i915_gem_object_retire__read(obj, i);
-   else
-   i915_gem_object_retire__write(obj);
-   }
+   ret = i915_wait_request(obj->last_write.request);
+   if (ret)
+   return ret;
} else {
for (i = 0; i < I915_NUM_RINGS; i++) {
-   if (obj->last_read[i].request == NULL)
-   continue;
-
ret = i915_wait_reques

[Intel-gfx] [PATCH 078/190] drm/i915: Split early global GTT initialisation

2016-01-11 Thread Chris Wilson
Initialising the global GTT is tricky as we wish to use the drm_mm range
manager during the modesetting initialisation (to capture stolen
allocations from the BIOS) before we actually enable GEM. To overcome
this, we currently setup the drm_mm first and then carefully rebind
them.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_dma.c|  2 ++
 drivers/gpu/drm/i915/i915_gem.c|  5 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c| 62 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.h|  1 +
 drivers/gpu/drm/i915/i915_gem_stolen.c | 17 +-
 5 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c0242ce45e43..4a24831a14fa 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,6 +989,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long 
flags)
dev_priv->gtt.mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base,
  aperture_size);
 
+   i915_gem_init_global_gtt(dev);
+
/* The i915 workqueue is primarily used for batched retirement of
 * requests (and thus managing bo) once the task has been completed
 * by the GPU. i915_gem_retire_requests() is called directly when we
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e4d7c7f5aca2..44bd514a6c2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4237,7 +4237,9 @@ int i915_gem_init(struct drm_device *dev)
if (ret)
goto out_unlock;
 
-   i915_gem_init_global_gtt(dev);
+   ret = i915_global_gtt_setup(dev);
+   if (ret)
+   goto out_unlock;
 
ret = i915_gem_context_init(dev);
if (ret)
@@ -4312,7 +4314,6 @@ i915_gem_load(struct drm_device *dev)
  SLAB_HWCACHE_ALIGN,
  NULL);
 
-   INIT_LIST_HEAD(&dev_priv->vm_list);
INIT_LIST_HEAD(&dev_priv->context_list);
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6168182a87d8..b5c3bbe6dc2a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2681,10 +2681,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node 
*node,
}
 }
 
-static int i915_gem_setup_global_gtt(struct drm_device *dev,
-u64 start,
-u64 mappable_end,
-u64 end)
+int i915_global_gtt_setup(struct drm_device *dev)
 {
/* Let GEM Manage all of the aperture.
 *
@@ -2697,48 +2694,16 @@ static int i915_gem_setup_global_gtt(struct drm_device 
*dev,
 */
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
-   struct drm_mm_node *entry;
-   struct drm_i915_gem_object *obj;
unsigned long hole_start, hole_end;
+   struct drm_mm_node *entry;
int ret;
 
-   BUG_ON(mappable_end > end);
-
-   ggtt_vm->start = start;
-
-   /* Subtract the guard page before address space initialization to
-* shrink the range used by drm_mm */
-   ggtt_vm->total = end - start - PAGE_SIZE;
-   i915_address_space_init(ggtt_vm, dev_priv);
-   ggtt_vm->total += PAGE_SIZE;
-
if (intel_vgpu_active(dev)) {
ret = intel_vgt_balloon(dev);
if (ret)
return ret;
}
 
-   if (!HAS_LLC(dev))
-   ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
-
-   /* Mark any preallocated objects as occupied */
-   list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-   struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
-
-   DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
- i915_gem_obj_ggtt_offset(obj), obj->base.size);
-
-   WARN_ON(i915_gem_obj_ggtt_bound(obj));
-   ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
-   if (ret) {
-   DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
-   return ret;
-   }
-   vma->bound |= GLOBAL_BIND;
-   __i915_vma_set_map_and_fenceable(vma);
-   list_add_tail(&vma->vm_link, &ggtt_vm->inactive_list);
-   }
-
/* Clear any non-preallocated blocks */
drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2748,7 +2713,9 @@ static int i915_gem_setup_global_gtt(struct drm_device 
*dev,
}
 
/* And finally clear the reserved guard page */
-   ggtt_vm->cle

[Intel-gfx] [PATCH 033/190] drm/i915: Only start retire worker when idle

2016-01-11 Thread Chris Wilson
The retire worker is a low frequency task that makes sure we retire
outstanding requests if userspace is being lax. We only need to start it
once as it remains active until the GPU is idle, so do a cheap test
before the more expensive queue_work(). A consequence of this is that we
need correct locking in the worker to make the hot path of request
submission cheap. To keep the symmetry and keep hangcheck strictly bound
by the GPU's wakelock, we move the cancel_sync(hangcheck) to the idle
worker before dropping the wakelock.

v2: Guard against RCU fouling the breadcrumbs bottom-half whilst we kick
the waiter.
v3: Remove the wakeref assertion squelching (now we hold a wakeref for
the hangcheck, any rpm error there is genuine).

Signed-off-by: Chris Wilson 
References: https://bugs.freedesktop.org/show_bug.cgi?id=88437
---
 drivers/gpu/drm/i915/i915_drv.c|  2 -
 drivers/gpu/drm/i915/i915_drv.h|  2 +-
 drivers/gpu/drm/i915/i915_gem.c| 83 --
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 +++
 drivers/gpu/drm/i915/i915_irq.c| 16 +-
 drivers/gpu/drm/i915/intel_display.c   | 29 ---
 6 files changed, 66 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5160f1414de4..4c090f1cf69c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1490,8 +1490,6 @@ static int intel_runtime_suspend(struct device *device)
i915_gem_release_all_mmaps(dev_priv);
mutex_unlock(&dev->struct_mutex);
 
-   cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
-
intel_guc_suspend(dev);
 
intel_suspend_gt_powersave(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7f021505e32f..9ec6f3e9e74d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2987,7 +2987,7 @@ int __must_check i915_gem_set_seqno(struct drm_device 
*dev, u32 seqno);
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
-bool i915_gem_retire_requests(struct drm_device *dev);
+void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ddb2ed0f785..3788fce136f3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2479,6 +2479,37 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
return 0;
 }
 
+static void i915_gem_mark_busy(struct drm_i915_private *dev_priv)
+{
+   if (dev_priv->mm.busy)
+   return;
+
+   intel_runtime_pm_get_noresume(dev_priv);
+
+   i915_update_gfx_val(dev_priv);
+   if (INTEL_INFO(dev_priv)->gen >= 6)
+   gen6_rps_busy(dev_priv);
+
+   queue_delayed_work(dev_priv->wq,
+  &dev_priv->mm.retire_work,
+  round_jiffies_up_relative(HZ));
+
+   dev_priv->mm.busy = true;
+}
+
+static void i915_gem_mark_idle(struct drm_i915_private *dev_priv)
+{
+   dev_priv->mm.busy = false;
+
+   if (cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work))
+   intel_kick_waiters(dev_priv);
+
+   if (INTEL_INFO(dev_priv)->gen >= 6)
+   gen6_rps_idle(dev_priv);
+
+   intel_runtime_pm_put(dev_priv);
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -2559,10 +2590,7 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 
trace_i915_gem_request_add(request);
 
-   queue_delayed_work(dev_priv->wq,
-  &dev_priv->mm.retire_work,
-  round_jiffies_up_relative(HZ));
-   intel_mark_busy(dev_priv->dev);
+   i915_gem_mark_busy(dev_priv);
 
/* Sanity check that the reserved size was large enough. */
intel_ring_reserved_space_end(ringbuf);
@@ -2892,7 +2920,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
WARN_ON(i915_verify_lists(ring->dev));
 }
 
-bool
+void
 i915_gem_retire_requests(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2900,6 +2928,9 @@ i915_gem_retire_requests(struct drm_device *dev)
bool idle = true;
int i;
 
+   if (!dev_priv->mm.busy)
+   return;
+
for_each_ring(ring, dev_priv, i) {
i915_gem_retire_requests_ring(ring);
idle &= list_empty(&ring->request_list);
@@ -2918,8 +2949,6 @@ i915_gem_retire_requests(struct drm_device *dev)
mod_delayed_work(dev_priv->wq,
 &dev_priv->mm.idle_work,
 msecs_to_jiffies(100));
-
-

[Intel-gfx] [PATCH 070/190] drm/i915: Unify legacy/execlists submit_execbuf callbacks

2016-01-11 Thread Chris Wilson
Now that emitting requests is identical between legacy and execlists, we
can use the same function to build up the ring for submitting to either
engine. (With the exception of i915_switch_contexts(), but in time that
will also be handled gracefully.)

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|  20 -
 drivers/gpu/drm/i915/i915_gem.c|   2 -
 drivers/gpu/drm/i915/i915_gem_context.c|   3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 --
 drivers/gpu/drm/i915/intel_lrc.c   | 129 -
 drivers/gpu/drm/i915/intel_lrc.h   |   4 -
 6 files changed, 20 insertions(+), 162 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0c580124d46d..cae448e238ca 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1655,18 +1655,6 @@ struct i915_virtual_gpu {
bool active;
 };
 
-struct i915_execbuffer_params {
-   struct drm_device   *dev;
-   struct drm_file *file;
-   uint32_tdispatch_flags;
-   uint32_targs_batch_start_offset;
-   uint64_tbatch_obj_vm_offset;
-   struct intel_engine_cs  *ring;
-   struct drm_i915_gem_object  *batch_obj;
-   struct intel_context*ctx;
-   struct drm_i915_gem_request *request;
-};
-
 /* used in computing the new watermarks state */
 struct intel_wm_config {
unsigned int num_pipes_active;
@@ -1934,9 +1922,6 @@ struct drm_i915_private {
 
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
struct {
-   int (*execbuf_submit)(struct i915_execbuffer_params *params,
- struct drm_i915_gem_execbuffer2 *args,
- struct list_head *vmas);
int (*init_rings)(struct drm_device *dev);
void (*cleanup_ring)(struct intel_engine_cs *ring);
void (*stop_ring)(struct intel_engine_cs *ring);
@@ -2656,11 +2641,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, 
void *data,
  struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_priv);
-void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-   struct drm_i915_gem_request *req);
-int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
-  struct drm_i915_gem_execbuffer2 *args,
-  struct list_head *vmas);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_execbuffer2(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5b5afdcd9634..235a3de6e0a0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4308,12 +4308,10 @@ int i915_gem_init(struct drm_device *dev)
mutex_lock(&dev->struct_mutex);
 
if (!i915.enable_execlists) {
-   dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
dev_priv->gt.init_rings = i915_gem_init_rings;
dev_priv->gt.cleanup_ring = intel_engine_cleanup;
dev_priv->gt.stop_ring = intel_engine_stop;
} else {
-   dev_priv->gt.execbuf_submit = intel_execlists_submission;
dev_priv->gt.init_rings = intel_logical_rings_init;
dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
dev_priv->gt.stop_ring = intel_logical_ring_stop;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index c078ebc29da5..72b0875a95a4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -819,8 +819,9 @@ unpin_out:
  */
 int i915_switch_context(struct drm_i915_gem_request *req)
 {
+   if (i915.enable_execlists)
+   return 0;
 
-   WARN_ON(i915.enable_execlists);
WARN_ON(!mutex_is_locked(&req->i915->dev->struct_mutex));
 
if (req->ctx->legacy_hw_ctx.rcs_state == NULL) { /* We have the fake 
context */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3e6384deca65..6dee27224ddb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -41,6 +41,18 @@
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
+struct i915_execbuffer_params {
+   struct drm_device   *dev;
+   struct drm_file *file;
+   uint32_tdispatch_flags;
+   uint32_targs_batch_start_offset;
+   uint64_

[Intel-gfx] [PATCH 014/190] drm/i915: Delay queuing hangcheck to wait-request

2016-01-11 Thread Chris Wilson
We can forgo queuing the hangcheck from the start of every request to
until we wait upon a request. This reduces the overhead of every
request, but may increase the latency of detecting a hang. Howeever, if
nothing every waits upon a hang, did it ever hang? It also improves the
robustness of the wait-request by ensuring that the hangchecker is
indeed running before we sleep indefinitely (and thereby ensuring that
we never actually sleep forever waiting for a dead GPU).

v2: Also queue the hangcheck from retire work in case the GPU become
stuck when no one is watching.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 drivers/gpu/drm/i915/i915_gem.c | 13 -
 drivers/gpu/drm/i915/i915_irq.c |  9 -
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bbdb056d2a8e..d9d411919779 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2710,7 +2710,7 @@ void intel_hpd_cancel_work(struct drm_i915_private 
*dev_priv);
 bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
 
 /* i915_irq.c */
-void i915_queue_hangcheck(struct drm_device *dev);
+void i915_queue_hangcheck(struct drm_i915_private *dev_priv);
 __printf(3, 4)
 void i915_handle_error(struct drm_device *dev, bool wedged,
   const char *fmt, ...);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f570990f03e0..b4da8b354a3b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1306,6 +1306,9 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
break;
}
 
+   /* Ensure that even if the GPU hangs, we get woken up. */
+   i915_queue_hangcheck(dev_priv);
+
timer.function = NULL;
if (timeout || missed_irq(dev_priv, ring)) {
unsigned long expire;
@@ -2592,8 +2595,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 
trace_i915_gem_request_add(request);
 
-   i915_queue_hangcheck(ring->dev);
-
queue_delayed_work(dev_priv->wq,
   &dev_priv->mm.retire_work,
   round_jiffies_up_relative(HZ));
@@ -2947,8 +2948,8 @@ i915_gem_retire_requests(struct drm_device *dev)
 
if (idle)
mod_delayed_work(dev_priv->wq,
-  &dev_priv->mm.idle_work,
-  msecs_to_jiffies(100));
+&dev_priv->mm.idle_work,
+msecs_to_jiffies(100));
 
return idle;
 }
@@ -2967,9 +2968,11 @@ i915_gem_retire_work_handler(struct work_struct *work)
idle = i915_gem_retire_requests(dev);
mutex_unlock(&dev->struct_mutex);
}
-   if (!idle)
+   if (!idle) {
+   i915_queue_hangcheck(dev_priv);
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
   round_jiffies_up_relative(HZ));
+   }
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 15973e917566..94f5f4e99446 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3165,18 +3165,17 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
goto out;
}
 
+   /* Reset timer in case GPU hangs without another request being added */
if (busy_count)
-   /* Reset timer case chip hangs without another request
-* being added */
-   i915_queue_hangcheck(dev);
+   i915_queue_hangcheck(dev_priv);
 
 out:
ENABLE_RPM_WAKEREF_ASSERTS(dev_priv);
 }
 
-void i915_queue_hangcheck(struct drm_device *dev)
+void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 {
-   struct i915_gpu_error *e = &to_i915(dev)->gpu_error;
+   struct i915_gpu_error *e = &dev_priv->gpu_error;
 
if (!i915.enable_hangcheck)
return;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 061/190] drm/i915: Rename intel_context[engine].ringbuf

2016-01-11 Thread Chris Wilson
Perform s/ringbuf/ring/ on the context struct for consistency with the
ring/engine split.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_drv.h|  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  6 +--
 drivers/gpu/drm/i915/intel_lrc.c   | 63 ++
 4 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 018076c89247..6e91726db8d3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1988,7 +1988,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
struct drm_i915_gem_object *ctx_obj =
ctx->engine[i].state;
struct intel_ringbuffer *ringbuf =
-   ctx->engine[i].ringbuf;
+   ctx->engine[i].ring;
 
seq_printf(m, "%s: ", ring->name);
if (ctx_obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index baede4517c70..9f06dd19bfb2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -885,7 +885,7 @@ struct intel_context {
/* Execlists */
struct {
struct drm_i915_gem_object *state;
-   struct intel_ringbuffer *ringbuf;
+   struct intel_ringbuffer *ring;
int pin_count;
} engine[I915_NUM_RINGS];
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 53abe2143f8a..b47e630e048a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -390,7 +390,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 
for (i = 0; i < I915_NUM_RINGS; i++) {
struct guc_execlist_context *lrc = &desc.lrc[i];
-   struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
+   struct intel_ringbuffer *ring = ctx->engine[i].ring;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
uint64_t ctx_desc;
@@ -406,7 +406,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
if (!obj)
break;  /* XXX: continue? */
 
-   engine = ringbuf->engine;
+   engine = ring->engine;
ctx_desc = intel_lr_context_descriptor(ctx, engine);
lrc->context_desc = (u32)ctx_desc;
 
@@ -416,7 +416,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
(engine->id << GUC_ELC_ENGINE_OFFSET);
 
-   obj = ringbuf->obj;
+   obj = ring->obj;
 
lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8639ebfab96f..65beb7267d1a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -402,24 +402,24 @@ static void execlists_submit_requests(struct 
drm_i915_gem_request *rq0,
execlists_elsp_write(rq0, rq1);
 }
 
-static void execlists_context_unqueue(struct intel_engine_cs *ring)
+static void execlists_context_unqueue(struct intel_engine_cs *engine)
 {
struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
 
-   assert_spin_locked(&ring->execlist_lock);
+   assert_spin_locked(&engine->execlist_lock);
 
/*
 * If irqs are not active generate a warning as batches that finish
 * without the irqs may get lost and a GPU Hang may occur.
 */
-   WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
+   WARN_ON(!intel_irqs_enabled(engine->dev->dev_private));
 
-   if (list_empty(&ring->execlist_queue))
+   if (list_empty(&engine->execlist_queue))
return;
 
/* Try to read in pairs */
-   list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
+   list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
 execlist_link) {
if (!req0) {
req0 = cursor;
@@ -429,7 +429,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
cursor->elsp_submitted = req0->elsp_submitted;
list_del(&req0->execlist_link);
list_add_tail(&req0->execlist_link,
-   &ring->execlist_retired_req_list);
+   &engine->execlist_retired_req_list);
   

[Intel-gfx] [PATCH 034/190] drm/i915: Do not keep postponing the idle-work

2016-01-11 Thread Chris Wilson
Rather than persistently postponing the idle-work everytime somebody
calls i915_gem_retire_requests() (potentially ensuring that we never
reach the idle state), queue the work the first time we detect all
requests are complete. Then if in 100ms, more requests have been queued,
we will abort the idle-worker and wait again until all the new requests
have been completed.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3788fce136f3..efd46adb978b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2946,9 +2946,9 @@ i915_gem_retire_requests(struct drm_device *dev)
}
 
if (idle)
-   mod_delayed_work(dev_priv->wq,
-&dev_priv->mm.idle_work,
-msecs_to_jiffies(100));
+   queue_delayed_work(dev_priv->wq,
+  &dev_priv->mm.idle_work,
+  msecs_to_jiffies(100));
 }
 
 static void
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 018/190] drm/i915: Slaughter the thundering i915_wait_request herd

2016-01-11 Thread Chris Wilson
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one.

Ideally, we only want one client to wake up after the interrupt and
check its request for completion. Since the requests must retire in
order, we can select the first client on the oldest request to be woken.
Once that client has completed his wait, we can then wake up the
next client and so on. However, all clients then incur latency as every
process in the chain may be delayed for scheduling - this may also then
cause some priority inversion. To reduce the latency, when a client
is added or removed from the list, we scan the tree for completed
seqno and wake up all the completed waiters in parallel.

Using igt/benchmarks/gem_latency, we can demonstrate this effect. The
benchmark measures the number of GPU cycles between completion of a
batch and the client waking up from a call to wait-ioctl. With many
concurrent waiters, with each on a different request, we observe that
the wakeup latency before the patch scales nearly linearly with the
number of waiters (before external factors kick in making the scaling much
worse). After applying the patch, we can see that only the single waiter
for the request is being woken up, providing a constant wakeup latency
for every operation. However, the situation is not quite as rosy for
many waiters on the same request, though to the best of my knowledge this
is much less likely in practice. Here, we can observe that the
concurrent waiters incur extra latency from being woken up by the
solitary bottom-half, rather than directly by the interrupt. This
appears to be scheduler induced (having discounted adverse effects from
having a rbtree walk/erase in the wakeup path), each additional
wake_up_process() costs aproximately 1us on big core. Another effect of
performing the secondary wakeups from the first bottom-half is the
incurred delay this imposes on high priority threads - rather than
immediately returning to userspace and leaving the interrupt handler to
wake the others.

To offset the delay incurred with additional waiters on a request, we
could use a hybrid scheme that did a quick read in the interrupt handler
and dequeued all the completed waiters (incurring the overhead in the
interrupt handler, not the best plan either as we then incur GPU
submission latency) but we would still have to wake up the bottom-half
everytime to do the heavyweight slow read. Or we could only kick the
waiters on the seqno with the same priority as the current task (i.e. in
the realtime waiter scenario, only it is woken up immediately by the
interrupt and simply queues the next waiter before returning to userspace,
minimising its delay at the expense of the chain, and also reducing
contention on its scheduler runqueue). This is effective at avoid long
pauses in the interrupt handler and at avoiding the extra latency in
realtime/high-priority waiters.

v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.
v7: Move the bottom-half from the kthread to the first client process.
v8: Reword a few comments
v9: Break the busy loop when the interrupt is unmasked or has fired.
v10: Comments, unnecessary churn, better debugging from Tvrtko
v11: Wake all completed waiters on removing the current bottom-half to
reduce the latency of waking up a herd of clients all waiting on the
same request.
v12: Rearrange missed-interrupt fault injection so that it works with
igt/drv_missed_irq_hang
v13: Rename intel_breadcrumb and friends to intel_wait in preparation
for signal handling.
v14: RCU commentary, assert_spin_locked
v15: Hide BUG_ON behind the compiler; report on gem_latency findings.
v16: Sort seqno-groups by priority so that first-waiter has the highest
task priority (and so avoid priority inversion).

Testcase: igt/gem_concurrent_blit
Testcase: igt/benchmarks/gem_latency
Signed-off-by: Chris Wilson 
Cc: "Rogozhkin, Dmitry V" 
Cc: "Gong, Zhipeng" 
Cc: Tvrtko Ursulin 
Cc: Dave Gordon 
---
 drivers/gpu/drm/i915/Makefile|   1 +
 drivers/gpu/drm/i915/i915_debugfs.c  |  19 +-
 drivers/gpu/drm/i915/i915_drv.h  |  32 ++-
 drivers/gpu/drm/i915/i915_gem.c  | 141 +
 drivers/gpu/drm/i915/i915_gpu_error.c|   2 +-
 drivers/gpu/drm/i915/i915_irq.c  |  20 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 336 +

[Intel-gfx] [PATCH 083/190] drm/i915: Be more careful when unbinding vma

2016-01-11 Thread Chris Wilson
When we call i915_vma_unbind(), we will wait upon outstanding rendering.
This will also trigger a retirement phase, which may update the object
lists. If, we extend request tracking to the VMA itself (rather than
keep it at the encompassing object), then there is a potential that the
obj->vma_list be modified for other elements upon i915_vma_unbind(). As
a result, if we walk over the object list and call i915_vma_unbind(), we
need to be prepared for that list to change.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_gem.c  | 54 
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  6 +---
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  4 +--
 4 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8f5cf244094e..9fa925389332 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2707,6 +2707,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma);
  * _guarantee_ VMA in question is _not in use_ anywhere.
  */
 int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
+
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed3f306af42f..95e69dc47fc8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -254,18 +254,38 @@ static const struct drm_i915_gem_object_ops 
i915_gem_phys_ops = {
.release = i915_gem_object_release_phys,
 };
 
+int
+i915_gem_object_unbind(struct drm_i915_gem_object *obj)
+{
+   struct list_head still_in_list;
+
+   INIT_LIST_HEAD(&still_in_list);
+   while (!list_empty(&obj->vma_list)) {
+   struct i915_vma *vma =
+   list_first_entry(&obj->vma_list,
+struct i915_vma,
+obj_link);
+   int ret;
+
+   list_move_tail(&vma->obj_link, &still_in_list);
+   ret = i915_vma_unbind(vma);
+   if (ret)
+   break;
+   }
+   list_splice(&still_in_list, &obj->vma_list);
+
+   return 0;
+}
+
 static int
 drop_pages(struct drm_i915_gem_object *obj)
 {
-   struct i915_vma *vma, *next;
int ret;
 
drm_gem_object_reference(&obj->base);
-   list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
-   if (i915_vma_unbind(vma))
-   break;
-
-   ret = i915_gem_object_put_pages(obj);
+   ret = i915_gem_object_unbind(obj);
+   if (ret == 0)
+   ret = i915_gem_object_put_pages(obj);
drm_gem_object_unreference(&obj->base);
 
return ret;
@@ -3038,7 +3058,7 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
 {
struct drm_device *dev = obj->base.dev;
-   struct i915_vma *vma, *next;
+   struct i915_vma *vma;
int ret = 0;
 
if (obj->cache_level == cache_level)
@@ -3049,7 +3069,8 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
 * catch the issue of the CS prefetch crossing page boundaries and
 * reading an invalid PTE on older architectures.
 */
-   list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
+restart:
+   list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
 
@@ -3058,11 +3079,18 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
return -EBUSY;
}
 
-   if (!i915_gem_valid_gtt_space(vma, cache_level)) {
-   ret = i915_vma_unbind(vma);
-   if (ret)
-   return ret;
-   }
+   if (i915_gem_valid_gtt_space(vma, cache_level))
+   continue;
+
+   ret = i915_vma_unbind(vma);
+   if (ret)
+   return ret;
+
+   /* As unbinding may affect other elements in the
+* obj->vma_list (due to side-effects from retiring
+* an active vma), play safe and restart the iterator.
+*/
+   goto restart;
}
 
/* We can reuse the existing drm_mm nodes but need to change the
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index fa190ef3f727..e15fc7531f08 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -141,7 +141,6

[Intel-gfx] [PATCH 035/190] drm/i915: Remove redundant queue_delayed_work() from throttle ioctl

2016-01-11 Thread Chris Wilson
We know, by design, that whilst the GPU is active (and thus we are
throttling) the retire_worker is queued. Therefore attempting to requeue
it with queue_delayed_work() is a no-op and we can safely remove it.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index efd46adb978b..e9f5ca7ea835 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4116,9 +4116,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
drm_file *file)
return 0;
 
ret = __i915_wait_request(target, true, NULL, NULL);
-   if (ret == 0)
-   queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
-
i915_gem_request_unreference__unlocked(target);
 
return ret;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 044/190] drm/i915: Move GEM request routines to i915_gem_request.c

2016-01-11 Thread Chris Wilson
Migrate the request operations out of the main body of i915_gem.c and
into their own C file for easier expansion.

v2: Move __i915_add_request() across as well

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/i915_drv.h | 205 +-
 drivers/gpu/drm/i915/i915_gem.c | 652 +--
 drivers/gpu/drm/i915/i915_gem_request.c | 659 
 drivers/gpu/drm/i915/i915_gem_request.h | 223 +++
 5 files changed, 895 insertions(+), 845 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 99ce591c8574..b0a83215db80 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_gtt.o \
  i915_gem.o \
  i915_gem_render_state.o \
+ i915_gem_request.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 57e450e25ad6..ee146ce02412 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@
 #include "intel_lrc.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
+#include "i915_gem_request.h"
 #include 
 #include 
 #include 
@@ -2162,179 +2163,15 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
-void i915_gem_track_fb(struct drm_i915_gem_object *old,
-  struct drm_i915_gem_object *new,
-  unsigned frontbuffer_bits);
-
-/**
- * Request queue structure.
- *
- * The request queue allows us to note sequence numbers that have been emitted
- * and may be associated with active buffers to be retired.
- *
- * By keeping this list, we can avoid having to do questionable sequence
- * number comparisons on buffer last_read|write_seqno. It also allows an
- * emission time to be associated with the request for tracking how far ahead
- * of the GPU the submission is.
- *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
- */
-struct drm_i915_gem_request {
-   struct kref ref;
-
-   /** On Which ring this request was generated */
-   struct drm_i915_private *i915;
-   struct intel_engine_cs *ring;
-   unsigned reset_counter;
-
-/** GEM sequence number associated with the previous request,
- * when the HWS breadcrumb is equal to this the GPU is processing
- * this request.
- */
-   u32 previous_seqno;
-
-/** GEM sequence number associated with this request,
- * when the HWS breadcrumb is equal or greater than this the GPU
- * has finished processing this request.
- */
-   u32 seqno;
-
-   /** Position in the ringbuffer of the start of the request */
-   u32 head;
-
-   /**
-* Position in the ringbuffer of the start of the postfix.
-* This is required to calculate the maximum available ringbuffer
-* space without overwriting the postfix.
-*/
-u32 postfix;
-
-   /** Position in the ringbuffer of the end of the whole request */
-   u32 tail;
-
-   /**
-* Context and ring buffer related to this request
-* Contexts are refcounted, so when this request is associated with a
-* context, we must increment the context's refcount, to guarantee that
-* it persists while any request is linked to it. Requests themselves
-* are also refcounted, so the request will only be freed when the last
-* reference to it is dismissed, and the code in
-* i915_gem_request_free() will then decrement the refcount on the
-* context.
-*/
-   struct intel_context *ctx;
-   struct intel_ringbuffer *ringbuf;
-
-   /** Batch buffer related to this request if any (used for
-   error state dump only) */
-   struct drm_i915_gem_object *batch_obj;
-
-   /** Time at which this request was emitted, in jiffies. */
-   unsigned long emitted_jiffies;
-
-   /** global list entry for this request */
-   struct list_head list;
-
-   struct drm_i915_file_private *file_priv;
-   /** file_priv list entry for this request */
-   struct list_head client_list;
-
-   /** process identifier submitting this request */
-   struct pid *pid;
-
-   /**
-* The ELSP only accepts two elements at a time, so we queue
-* context/tail pairs on a given queue (ring->execlist_queue) until the
-* hardware is available. The queue serves a double purpose: we also use
-* it to keep track of the up to 2 contexts currently in the hardware
-* (usuall

[Intel-gfx] [PATCH 080/190] drm/i915: Store owning file on the i915_address_space

2016-01-11 Thread Chris Wilson
For the global GTT (and aliasing GTT), the address space is owned by the
device (it is a global resource) and so the per-file owner field is
NULL. For per-process GTT (where we create an address space per
context), each is owned by the opening file. We can use this ownership
information to both distinguish GGTT and ppGTT address spaces, as well
as occasionally inspect the owner.

v2: Whitespace, tells us who owns i915_address_space

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.h |  1 -
 drivers/gpu/drm/i915/i915_gem_context.c |  3 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 27 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.h | 21 ++---
 5 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 99a6181b012e..0d1f470567b0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -352,7 +352,7 @@ static int per_file_stats(int id, void *ptr, void *data)
= container_of(vma->vm,
   struct i915_hw_ppgtt,
   base);
-   if (ppgtt->file_priv != stats->file_priv)
+   if (ppgtt->base.file != stats->file_priv)
continue;
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f840cc55f1ab..0cc3ee589dfb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2913,7 +2913,6 @@ i915_vm_to_ppgtt(struct i915_address_space *vm)
return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
-
 static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 {
return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 05b4e0e85f24..fab702abd1cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -296,7 +296,8 @@ i915_gem_create_context(struct drm_device *dev,
}
 
if (USES_FULL_PPGTT(dev)) {
-   struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
+   struct i915_hw_ppgtt *ppgtt =
+   i915_ppgtt_create(to_i915(dev), file_priv);
 
if (IS_ERR_OR_NULL(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 06117bd0fc00..3a07ff622bd6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2112,11 +2112,12 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
return 0;
 }
 
-static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
+static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
+  struct drm_i915_private *dev_priv)
 {
-   ppgtt->base.dev = dev;
+   ppgtt->base.dev = dev_priv->dev;
 
-   if (INTEL_INFO(dev)->gen < 8)
+   if (INTEL_INFO(dev_priv)->gen < 8)
return gen6_ppgtt_init(ppgtt);
else
return gen8_ppgtt_init(ppgtt);
@@ -2132,15 +2133,17 @@ static void i915_address_space_init(struct 
i915_address_space *vm,
list_add_tail(&vm->global_link, &dev_priv->vm_list);
 }
 
-int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
+int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
+   struct drm_i915_private *dev_priv,
+   struct drm_i915_file_private *file_priv)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   int ret = 0;
+   int ret;
 
-   ret = __hw_ppgtt_init(dev, ppgtt);
+   ret = __hw_ppgtt_init(ppgtt, dev_priv);
if (ret == 0) {
kref_init(&ppgtt->ref);
i915_address_space_init(&ppgtt->base, dev_priv);
+   ppgtt->base.file = file_priv;
}
 
return ret;
@@ -2183,7 +2186,8 @@ int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
 }
 
 struct i915_hw_ppgtt *
-i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
+i915_ppgtt_create(struct drm_i915_private *dev_priv,
+ struct drm_i915_file_private *fpriv)
 {
struct i915_hw_ppgtt *ppgtt;
int ret;
@@ -2192,14 +2196,12 @@ i915_ppgtt_create(struct drm_device *dev, struct 
drm_i915_file_private *fpriv)
if (!ppgtt)
return ERR_PTR(-ENOMEM);
 
-   ret = i915_ppgtt_init(dev, ppgtt);
+   ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
if (ret) {
kfree(ppgtt);
return ERR_PTR(ret);
}
 
-   ppgtt->file_priv = fpriv;
-
trace_i915_ppgtt_create(&ppgtt->base)

[Intel-gfx] [PATCH 077/190] drm/i915: Amalgamate GGTT/ppGTT vma debug list walkers

2016-01-11 Thread Chris Wilson
As we can now have multiple VMA inside the global GTT (with partial
mappings, rotations, etc), it is no longer true that there may just be a
single GGTT entry and so we should walk the full vma_list to count up
the actual usage. In addition to unifying the two walkers, switch from
multiplying the object size for each vma to summing the bound vma sizes.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 46 +++--
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index f311df758195..dd1788c81b90 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -332,6 +332,7 @@ static int per_file_stats(int id, void *ptr, void *data)
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
+   int bound = 0;
 
stats->count++;
stats->total += obj->base.size;
@@ -339,41 +340,30 @@ static int per_file_stats(int id, void *ptr, void *data)
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
 
-   if (USES_FULL_PPGTT(obj->base.dev)) {
-   list_for_each_entry(vma, &obj->vma_list, obj_link) {
-   struct i915_hw_ppgtt *ppgtt;
+   list_for_each_entry(vma, &obj->vma_list, obj_link) {
+   if (!drm_mm_node_allocated(&vma->node))
+   continue;
 
-   if (!drm_mm_node_allocated(&vma->node))
-   continue;
+   bound++;
 
-   if (i915_is_ggtt(vma->vm)) {
-   stats->global += obj->base.size;
-   continue;
-   }
-
-   ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, 
base);
+   if (i915_is_ggtt(vma->vm)) {
+   stats->global += vma->node.size;
+   } else {
+   struct i915_hw_ppgtt *ppgtt
+   = container_of(vma->vm,
+  struct i915_hw_ppgtt,
+  base);
if (ppgtt->file_priv != stats->file_priv)
continue;
-
-   if (obj->active) /* XXX per-vma statistic */
-   stats->active += obj->base.size;
-   else
-   stats->inactive += obj->base.size;
-
-   return 0;
-   }
-   } else {
-   if (i915_gem_obj_ggtt_bound(obj)) {
-   stats->global += obj->base.size;
-   if (obj->active)
-   stats->active += obj->base.size;
-   else
-   stats->inactive += obj->base.size;
-   return 0;
}
+
+   if (obj->active) /* XXX per-vma statistic */
+   stats->active += vma->node.size;
+   else
+   stats->inactive += vma->node.size;
}
 
-   if (!list_empty(&obj->global_list))
+   if (!bound)
stats->unbound += obj->base.size;
 
return 0;
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 053/190] drm/i915: Convert i915_semaphores_is_enabled over to early sanitize

2016-01-11 Thread Chris Wilson
Rather than recomputing whether semaphores are enabled, we can do that
computation once during early initialisation as the i915.semaphores
module parameter is now read-only.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  2 +-
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.c | 25 ---
 drivers/gpu/drm/i915/i915_drv.h |  1 -
 drivers/gpu/drm/i915/i915_gem.c | 35 ++---
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +--
 8 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 5335072f2047..387ae77d3c29 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3146,7 +3146,7 @@ static int i915_semaphore_status(struct seq_file *m, void 
*unused)
int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
int i, j, ret;
 
-   if (!i915_semaphore_is_enabled(dev)) {
+   if (!i915.semaphores) {
seq_puts(m, "Semaphores are disabled\n");
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9e49e304dd8e..4c72c83cfa28 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -126,7 +126,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = 1;
break;
case I915_PARAM_HAS_SEMAPHORES:
-   value = i915_semaphore_is_enabled(dev);
+   value = i915.semaphores;
break;
case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
value = 1;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e9f85fd0542f..cc831a34f7bb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -515,31 +515,6 @@ void intel_detect_pch(struct drm_device *dev)
pci_dev_put(pch);
 }
 
-bool i915_semaphore_is_enabled(struct drm_device *dev)
-{
-   if (INTEL_INFO(dev)->gen < 6)
-   return false;
-
-   if (i915.semaphores >= 0)
-   return i915.semaphores;
-
-   /* TODO: make semaphores and Execlists play nicely together */
-   if (i915.enable_execlists)
-   return false;
-
-   /* Until we get further testing... */
-   if (IS_GEN8(dev))
-   return false;
-
-#ifdef CONFIG_INTEL_IOMMU
-   /* Enable semaphores on SNB when IO remapping is off */
-   if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
-   return false;
-#endif
-
-   return true;
-}
-
 static void intel_suspend_encoders(struct drm_i915_private *dev_priv)
 {
struct drm_device *dev = dev_priv->dev;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 56cf2ffc1eac..58e9e5e50769 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3226,7 +3226,6 @@ extern void intel_set_memory_cxsr(struct drm_i915_private 
*dev_priv,
 extern void intel_detect_pch(struct drm_device *dev);
 extern int intel_enable_rc6(const struct drm_device *dev);
 
-extern bool i915_semaphore_is_enabled(struct drm_device *dev);
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a4f9c5bbb883..31926a4fb42a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2567,7 +2567,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
if (i915_gem_request_completed(from_req))
return 0;
 
-   if (!i915_semaphore_is_enabled(obj->base.dev)) {
+   if (!i915.semaphores) {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
@@ -4304,13 +4304,42 @@ out:
return ret;
 }
 
+static bool i915_gem_sanitize_semaphore(struct drm_i915_private *dev_priv,
+   int param_value)
+{
+   if (INTEL_INFO(dev_priv)->gen < 6)
+   return false;
+
+   if (param_value >= 0)
+   return param_value;
+
+   /* TODO: make semaphores and Execlists play nicely together */
+   if (i915.enable_execlists)
+   return false;
+
+   /* Until we get further testing... */
+   if (IS_GEN8(dev_priv))
+   return false;
+
+#ifdef CONFIG_INTEL_IOMMU
+   /* Enable semaphores on SNB when IO remapping is off */
+   if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+   return false;
+#endif
+
+   return t

[Intel-gfx] [PATCH 066/190] drm/i915: Simplify request_alloc by returning the allocated request

2016-01-11 Thread Chris Wilson
If is simpler and leads to more readable code through the callstack if
the allocation returns the allocated struct through the return value.

The importance of this is that it no longer looks like we accidentally
allocate requests as side-effect of calling certain functions.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|  3 +-
 drivers/gpu/drm/i915/i915_gem.c| 82 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  8 +--
 drivers/gpu/drm/i915/i915_gem_request.c| 22 +++-
 drivers/gpu/drm/i915/i915_gem_request.h|  6 +--
 drivers/gpu/drm/i915/i915_trace.h  | 15 +++---
 drivers/gpu/drm/i915/intel_display.c   | 25 +
 drivers/gpu/drm/i915/intel_lrc.c   |  6 +--
 drivers/gpu/drm/i915/intel_overlay.c   | 24 -
 9 files changed, 77 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 44e8738c5310..0c580124d46d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2786,8 +2786,7 @@ static inline void i915_gem_object_unpin_vmap(struct 
drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-struct intel_engine_cs *to,
-struct drm_i915_gem_request **to_req);
+struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1c6beb154d07..5b5afdcd9634 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2550,47 +2550,35 @@ out:
 
 static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
-  struct intel_engine_cs *to,
-  struct drm_i915_gem_request *from_req,
-  struct drm_i915_gem_request **to_req)
+  struct drm_i915_gem_request *to,
+  struct drm_i915_gem_request *from)
 {
-   struct intel_engine_cs *from;
int ret;
 
-   from = from_req->engine;
-   if (to == from)
+   if (to->engine == from->engine)
return 0;
 
-   if (i915_gem_request_completed(from_req))
+   if (i915_gem_request_completed(from))
return 0;
 
if (!i915.semaphores) {
-   struct drm_i915_private *i915 = from_req->i915;
-   ret = __i915_wait_request(from_req,
- i915->mm.interruptible,
+   ret = __i915_wait_request(from,
+ to->i915->mm.interruptible,
  NULL,
  NO_WAITBOOST);
if (ret)
return ret;
 
-   i915_gem_object_retire_request(obj, from_req);
+   i915_gem_object_retire_request(obj, from);
} else {
-   int idx = intel_engine_sync_index(from, to);
-   u32 seqno = i915_gem_request_get_seqno(from_req);
+   int idx = intel_engine_sync_index(from->engine, to->engine);
+   u32 seqno = i915_gem_request_get_seqno(from);
 
-   WARN_ON(!to_req);
-
-   if (seqno <= from->semaphore.sync_seqno[idx])
+   if (seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
 
-   if (*to_req == NULL) {
-   ret = i915_gem_request_alloc(to, to->default_context, 
to_req);
-   if (ret)
-   return ret;
-   }
-
-   trace_i915_gem_ring_sync_to(*to_req, from, from_req);
-   ret = to->semaphore.sync_to(*to_req, from, seqno);
+   trace_i915_gem_ring_sync_to(to, from);
+   ret = to->engine->semaphore.sync_to(to, from->engine, seqno);
if (ret)
return ret;
 
@@ -2598,8 +2586,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 * might have just caused seqno wrap under
 * the radar.
 */
-   from->semaphore.sync_seqno[idx] =
-   
i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+   from->engine->semaphore.sync_seqno[idx] =
+   
i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]);
}
 
return 0;
@@ -2609,17 +2597,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * i915_gem_object_sync - sync an object to a ring.
  *
  * @obj: object which may be in use on another ring.
- * @to: ring we wish to use the object on. May be NULL.
- * @to_req: request we wish

[Intel-gfx] [PATCH 057/190] drm/i915: Remove the identical implementations of request space reservation

2016-01-11 Thread Chris Wilson
Now that we share intel_ring_begin(), reserving space for the tail of
the request is identical between legacy/execlists and so the tautology
can be removed.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c |  7 +++
 drivers/gpu/drm/i915/intel_lrc.c| 15 ---
 drivers/gpu/drm/i915/intel_lrc.h|  1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ---
 5 files changed, 3 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 619a9b063d9c..85067069995e 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -255,10 +255,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 * to be redone if the request is not actually submitted straight
 * away, e.g. because a GPU scheduler has deferred it.
 */
-   if (i915.enable_execlists)
-   ret = intel_logical_ring_reserve_space(req);
-   else
-   ret = intel_ring_reserve_space(req);
+   intel_ring_reserved_space_reserve(req->ringbuf,
+ MIN_SPACE_FOR_ADD_REQUEST);
+   ret = intel_ring_begin(req, 0);
if (ret) {
/*
 * At this point, the request is fully allocated even if not
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3d14b69632e8..4f1944929330 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -721,21 +721,6 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
execlists_context_queue(request);
 }
 
-int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
-{
-   /*
-* The first call merely notes the reserve request and is common for
-* all back ends. The subsequent localised _begin() call actually
-* ensures that the reservation is available. Without the begin, if
-* the request creator immediately submitted the request without
-* adding any commands to it then there might not actually be
-* sufficient room for the submission commands.
-*/
-   intel_ring_reserved_space_reserve(request->ringbuf, 
MIN_SPACE_FOR_ADD_REQUEST);
-
-   return intel_ring_begin(request, 0);
-}
-
 /**
  * execlists_submission() - submit a batchbuffer for execution, Execlists style
  * @dev: DRM device.
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 32401e11cebe..c88988a41898 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -56,7 +56,6 @@
 
 /* Logical Rings */
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request 
*request);
-int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request);
 void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c694f602a0b8..db5c407f7720 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2086,21 +2086,6 @@ int intel_ring_alloc_request_extras(struct 
drm_i915_gem_request *request)
return 0;
 }
 
-int intel_ring_reserve_space(struct drm_i915_gem_request *request)
-{
-   /*
-* The first call merely notes the reserve request and is common for
-* all back ends. The subsequent localised _begin() call actually
-* ensures that the reservation is available. Without the begin, if
-* the request creator immediately submitted the request without
-* adding any commands to it then there might not actually be
-* sufficient room for the submission commands.
-*/
-   intel_ring_reserved_space_reserve(request->ringbuf, 
MIN_SPACE_FOR_ADD_REQUEST);
-
-   return intel_ring_begin(request, 0);
-}
-
 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int 
size)
 {
WARN_ON(ringbuf->reserved_size);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9c19a6ca8e7d..bc6ceb54b1f3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -536,9 +536,6 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer 
*ringbuf);
 /* Finish with the reserved space - for use by i915_add_request() only. */
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
 
-/* Legacy ringbuffer specific portion of reservation code: */
-int intel_ring_reserve_space(struct drm_i915_gem_request *request);
-
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 struct intel_wait {
struct rb_node node;
-- 
2.7.0.r

[Intel-gfx] [PATCH 071/190] drm/i915: Simplify calling engine->sync_to

2016-01-11 Thread Chris Wilson
Since requests can no longer be generated as a side-effect of
intel_ring_begin(), we know that the seqno will be unchanged during
ring-emission. This predicatablity then means we do not have to check
for the seqno wrapping around whilst emitting the semaphore for
engine->sync_to().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 13 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 67 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 +--
 3 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 235a3de6e0a0..b0230e7151ce 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2572,22 +2572,15 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
i915_gem_object_retire_request(obj, from);
} else {
int idx = intel_engine_sync_index(from->engine, to->engine);
-   u32 seqno = i915_gem_request_get_seqno(from);
-
-   if (seqno <= from->engine->semaphore.sync_seqno[idx])
+   if (from->fence.seqno <= 
from->engine->semaphore.sync_seqno[idx])
return 0;
 
trace_i915_gem_ring_sync_to(to, from);
-   ret = to->engine->semaphore.sync_to(to, from->engine, seqno);
+   ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
 
-   /* We use last_read_req because sync_to()
-* might have just caused seqno wrap under
-* the radar.
-*/
-   from->engine->semaphore.sync_seqno[idx] =
-   
i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]);
+   from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
}
 
return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 556e9e2c1fec..d37cdb2f9073 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1384,69 +1384,58 @@ static inline bool i915_gem_has_seqno_wrapped(struct 
drm_i915_private *dev_priv,
  */
 
 static int
-gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
-  struct intel_engine_cs *signaller,
-  u32 seqno)
+gen8_ring_sync(struct drm_i915_gem_request *wait,
+  struct drm_i915_gem_request *signal)
 {
-   struct intel_ring *waiter = waiter_req->ring;
-   struct drm_i915_private *dev_priv = waiter_req->i915;
+   struct intel_ring *waiter = wait->ring;
+   struct drm_i915_private *dev_priv = wait->i915;
int ret;
 
-   ret = intel_ring_begin(waiter_req, 4);
+   ret = intel_ring_begin(wait, 4);
if (ret)
return ret;
 
-   intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
-   MI_SEMAPHORE_GLOBAL_GTT |
-   MI_SEMAPHORE_POLL |
-   MI_SEMAPHORE_SAD_GTE_SDD);
-   intel_ring_emit(waiter, seqno);
intel_ring_emit(waiter,
-   lower_32_bits(GEN8_WAIT_OFFSET(waiter_req->engine,
-  signaller->id)));
+   MI_SEMAPHORE_WAIT |
+   MI_SEMAPHORE_GLOBAL_GTT |
+   MI_SEMAPHORE_POLL |
+   MI_SEMAPHORE_SAD_GTE_SDD);
+   intel_ring_emit(waiter, signal->fence.seqno);
intel_ring_emit(waiter,
-   upper_32_bits(GEN8_WAIT_OFFSET(waiter_req->engine,
-  signaller->id)));
+   lower_32_bits(GEN8_WAIT_OFFSET(wait->engine,
+  signal->engine->id)));
+   intel_ring_emit(waiter,
+   upper_32_bits(GEN8_WAIT_OFFSET(wait->engine,
+  signal->engine->id)));
intel_ring_advance(waiter);
return 0;
 }
 
 static int
-gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
-  struct intel_engine_cs *signaller,
-  u32 seqno)
+gen6_ring_sync(struct drm_i915_gem_request *wait,
+  struct drm_i915_gem_request *signal)
 {
-   struct intel_ring *waiter = waiter_req->ring;
+   struct intel_ring *waiter = wait->ring;
u32 dw1 = MI_SEMAPHORE_MBOX |
  MI_SEMAPHORE_COMPARE |
  MI_SEMAPHORE_REGISTER;
-   u32 wait_mbox = signaller->semaphore.mbox.wait[waiter_req->engine->id];
+   u32 wait_mbox = signal->engine->semaphore.mbox.wait[wait->engine->id];
int ret;
 
-   /* Throughout all of the GEM code, seqno passed implies our current
-* seqno is >= the last seqno executed. However for hardware the
-* comparison is strictly greater than.
-*/
-  

[Intel-gfx] [PATCH 074/190] drm/i915: Rename request->list to link for consistency

2016-01-11 Thread Chris Wilson
We use "list" to denote the list and "link" to denote an element on that
list. Rename request->list to match this idiom.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c | 12 ++--
 drivers/gpu/drm/i915/i915_gem_request.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_request.h |  4 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c   |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 +++---
 6 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 65cb1d6a5d64..efa9572fc217 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -695,13 +695,13 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
int count;
 
count = 0;
-   list_for_each_entry(req, &ring->request_list, list)
+   list_for_each_entry(req, &ring->request_list, link)
count++;
if (count == 0)
continue;
 
seq_printf(m, "%s requests: %d\n", ring->name, count);
-   list_for_each_entry(req, &ring->request_list, list) {
+   list_for_each_entry(req, &ring->request_list, link) {
struct task_struct *task;
 
rcu_read_lock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 77c253ddf060..f314b3ea2726 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2183,7 +2183,7 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 * extra delay for a recent interrupt is pointless. Hence, we do
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
-   list_for_each_entry(request, &ring->request_list, list) {
+   list_for_each_entry(request, &ring->request_list, link) {
if (i915_gem_request_completed(request))
continue;
 
@@ -2208,7 +2208,7 @@ static void i915_gem_reset_ring_status(struct 
intel_engine_cs *ring)
 
i915_set_reset_status(dev_priv, request->ctx, ring_hung);
 
-   list_for_each_entry_continue(request, &ring->request_list, list)
+   list_for_each_entry_continue(request, &ring->request_list, link)
i915_set_reset_status(dev_priv, request->ctx, false);
 }
 
@@ -2255,7 +2255,7 @@ static void i915_gem_reset_ring_cleanup(struct 
intel_engine_cs *engine)
 
request = list_last_entry(&engine->request_list,
  struct drm_i915_gem_request,
- list);
+ link);
 
i915_gem_request_retire_upto(request);
}
@@ -2317,7 +2317,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
 
request = list_first_entry(&ring->request_list,
   struct drm_i915_gem_request,
-  list);
+  link);
 
if (!i915_gem_request_completed(request))
break;
@@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*ring)
  struct drm_i915_gem_object,
  ring_list[ring->id]);
 
-   if (!list_empty(&obj->last_read[ring->id].request->list))
+   if (!list_empty(&obj->last_read[ring->id].request->link))
break;
 
i915_gem_object_retire__read(obj, ring->id);
@@ -2449,7 +2449,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object 
*obj)
if (req == NULL)
continue;
 
-   if (list_empty(&req->list))
+   if (list_empty(&req->link))
goto retire;
 
if (i915_gem_request_completed(req)) {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 01443d8d9224..7f38d8972721 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -333,7 +333,7 @@ void i915_gem_request_cancel(struct drm_i915_gem_request 
*req)
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
-   list_del_init(&request->list);
+   list_del_init(&request->link);
 
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
@@ -355,12 +355,12 @@ i915_gem_request_retire_upto(struct drm_i915_gem_request 
*req)
 
lockdep_assert_held(&engine->dev->struct_mutex);
 
-   if (list_empty(&req->list))
+   if (list_empty(&req->link))
return;
 
do {
tmp = list_fi

[Intel-gfx] [PATCH 062/190] drm/i915: Rename extern functions operating on intel_engine_cs

2016-01-11 Thread Chris Wilson
Using intel_ring_* to refer to the intel_engine_cs functions is most
confusing!

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 10 +++
 drivers/gpu/drm/i915/i915_dma.c|  8 +++---
 drivers/gpu/drm/i915/i915_drv.h|  4 +--
 drivers/gpu/drm/i915/i915_gem.c| 22 +++---
 drivers/gpu/drm/i915/i915_gem_context.c|  8 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 ++--
 drivers/gpu/drm/i915/i915_gem_request.c|  8 +++---
 drivers/gpu/drm/i915/i915_gem_request.h|  4 +--
 drivers/gpu/drm/i915/i915_gpu_error.c  |  8 +++---
 drivers/gpu/drm/i915/i915_guc_submission.c |  6 ++--
 drivers/gpu/drm/i915/i915_irq.c| 18 ++--
 drivers/gpu/drm/i915/i915_trace.h  |  2 +-
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  4 +--
 drivers/gpu/drm/i915/intel_lrc.c   | 17 +--
 drivers/gpu/drm/i915/intel_mocs.c  |  6 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 46 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h| 36 +++
 17 files changed, 104 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 6e91726db8d3..dec10784c2bc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -599,7 +599,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
   engine->name,
   
i915_gem_request_get_seqno(work->flip_queued_req),
   dev_priv->next_seqno,
-  intel_ring_get_seqno(engine),
+  intel_engine_get_seqno(engine),
   
i915_gem_request_completed(work->flip_queued_req));
} else
seq_printf(m, "Flip not associated with any 
ring\n");
@@ -732,7 +732,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
struct rb_node *rb;
 
seq_printf(m, "Current sequence (%s): %x\n",
-  ring->name, intel_ring_get_seqno(ring));
+  ring->name, intel_engine_get_seqno(ring));
 
seq_printf(m, "Current user interrupts (%s): %x\n",
   ring->name, READ_ONCE(ring->user_interrupts));
@@ -1354,8 +1354,8 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_ring(ring, dev_priv, i) {
-   acthd[i] = intel_ring_get_active_head(ring);
-   seqno[i] = intel_ring_get_seqno(ring);
+   acthd[i] = intel_engine_get_active_head(ring);
+   seqno[i] = intel_engine_get_seqno(ring);
}
 
i915_get_extra_instdone(dev, instdone);
@@ -2496,7 +2496,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
struct intel_guc guc;
struct i915_guc_client client = {};
struct intel_engine_cs *ring;
-   enum intel_ring_id i;
+   enum intel_engine_id i;
u64 total = 0;
 
if (!HAS_GUC_SCHED(dev_priv->dev))
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4c72c83cfa28..c0242ce45e43 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -87,16 +87,16 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = 1;
break;
case I915_PARAM_HAS_BSD:
-   value = intel_ring_initialized(&dev_priv->ring[VCS]);
+   value = intel_engine_initialized(&dev_priv->ring[VCS]);
break;
case I915_PARAM_HAS_BLT:
-   value = intel_ring_initialized(&dev_priv->ring[BCS]);
+   value = intel_engine_initialized(&dev_priv->ring[BCS]);
break;
case I915_PARAM_HAS_VEBOX:
-   value = intel_ring_initialized(&dev_priv->ring[VECS]);
+   value = intel_engine_initialized(&dev_priv->ring[VECS]);
break;
case I915_PARAM_HAS_BSD2:
-   value = intel_ring_initialized(&dev_priv->ring[VCS2]);
+   value = intel_engine_initialized(&dev_priv->ring[VCS2]);
break;
case I915_PARAM_HAS_RELAXED_FENCING:
value = 1;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9f06dd19bfb2..466adc6617f0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -520,7 +520,7 @@ struct drm_i915_error_state {
/* Software tracked state */
bool waiting;
int hangcheck_score;
-   enum intel_ring_hangcheck_action hangcheck_action;
+   enum intel_engine_hangcheck_action hangcheck_action;
int num_requests;
 
/* our own tracking

[Intel-gfx] [PATCH 025/190] drm/i915: Broadwell execlists needs exactly the same seqno w/a as legacy

2016-01-11 Thread Chris Wilson
In legacy mode, we use the gen6 seqno barrier to insert a delay after
the interrupt before reading the seqno (as the seqno write is not
flushed before the interrupt is sent, the interrupt arrives before the
seqno is visible). Execlists ignored the evidence of igt.

Note that is harder, but not impossible, to reproduce the missed
interrupt syndrome with execlists. This is primarily because execlists
itself being interrupt driven helps mask the issue.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 39 +--
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ad51b1fc37cd..27d91f1ceb2b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1775,18 +1775,24 @@ static int gen8_emit_flush_render(struct 
drm_i915_gem_request *request,
return 0;
 }
 
-static void bxt_seqno_barrier(struct intel_engine_cs *ring)
+static void
+gen6_seqno_barrier(struct intel_engine_cs *ring)
 {
-   /*
-* On BXT A steppings there is a HW coherency issue whereby the
-* MI_STORE_DATA_IMM storing the completed request's seqno
-* occasionally doesn't invalidate the CPU cache. Work around this by
-* clflushing the corresponding cacheline whenever the caller wants
-* the coherency to be guaranteed. Note that this cacheline is known
-* to be clean at this point, since we only write it in
-* bxt_a_set_seqno(), where we also do a clflush after the write. So
-* this clflush in practice becomes an invalidate operation.
+   /* Workaround to force correct ordering between irq and seqno writes on
+* ivb (and maybe also on snb) by reading from a CS register (like
+* ACTHD) before reading the status page.
+*
+* Note that this effectively effectively stalls the read by the time
+* it takes to do a memory transaction, which more or less ensures
+* that the write from the GPU has sufficient time to invalidate
+* the CPU cacheline. Alternatively we could delay the interrupt from
+* the CS ring to give the write time to land, but that would incur
+* a delay after every batch i.e. much more frequent than a delay
+* when waiting for the interrupt (with the same net latency).
 */
+   struct drm_i915_private *dev_priv = ring->i915;
+   POSTING_READ_FW(RING_ACTHD(ring->mmio_base));
+
intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
@@ -1984,8 +1990,7 @@ static int logical_render_ring_init(struct drm_device 
*dev)
ring->init_hw = gen8_init_render_ring;
ring->init_context = gen8_init_rcs_context;
ring->cleanup = intel_fini_pipe_control;
-   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
-   ring->irq_seqno_barrier = bxt_seqno_barrier;
+   ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush_render;
ring->irq_get = gen8_logical_ring_get_irq;
@@ -2031,8 +2036,7 @@ static int logical_bsd_ring_init(struct drm_device *dev)
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
 
ring->init_hw = gen8_init_common_ring;
-   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
-   ring->irq_seqno_barrier = bxt_seqno_barrier;
+   ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
ring->irq_get = gen8_logical_ring_get_irq;
@@ -2056,6 +2060,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
 
ring->init_hw = gen8_init_common_ring;
+   ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
ring->irq_get = gen8_logical_ring_get_irq;
@@ -2079,8 +2084,7 @@ static int logical_blt_ring_init(struct drm_device *dev)
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
 
ring->init_hw = gen8_init_common_ring;
-   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
-   ring->irq_seqno_barrier = bxt_seqno_barrier;
+   ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
ring->irq_get = gen8_logical_ring_get_irq;
@@ -2104,8 +2108,7 @@ static int logical_vebox_ring_init(struct drm_device *dev)
GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
 
ring->init_hw = gen8_init_common_ring;
-   if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
-   ring->irq_seqno_barrier = bxt_seqno_barrier;
+   ring->irq_seqno_barrier = gen6_seqno_barrier;
ring->emit_request = gen8_emit_request;
ring->emit_flush = gen8_emit_flush;
ring

[Intel-gfx] [PATCH 042/190] drm/i915: Clean up GPU hang message

2016-01-11 Thread Chris Wilson
Remove some redundant kernel messages as we deduce a hung GPU and
capture the error state.

v2: Fix "hang" vs "no progress" message whilst I was there

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d9757d227c86..ce52d7d9ad91 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3031,8 +3031,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
struct drm_device *dev = dev_priv->dev;
struct intel_engine_cs *ring;
int i;
-   int busy_count = 0, rings_hung = 0;
-   bool stuck[I915_NUM_RINGS] = { 0 };
+   int busy_count = 0;
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
@@ -3108,7 +3107,6 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
break;
case HANGCHECK_HUNG:
ring->hangcheck.score += HUNG;
-   stuck[i] = true;
break;
}
}
@@ -3134,17 +3132,12 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
busy_count += busy;
}
 
-   for_each_ring(ring, dev_priv, i) {
-   if (ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
-   DRM_INFO("%s on %s\n",
-stuck[i] ? "stuck" : "no progress",
-ring->name);
-   rings_hung++;
-   }
-   }
-
-   if (rings_hung)
-   return i915_handle_error(dev, true, "Ring hung");
+   for_each_ring(ring, dev_priv, i)
+   if (ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG)
+   return i915_handle_error(dev, true,
+"%s on %s",
+ring->hangcheck.action == 
HANGCHECK_HUNG ? "Hang" : "No progress" ,
+ring->name);
 
/* Reset timer in case GPU hangs without another request being added */
if (busy_count)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 081/190] drm/i915: i915_vma_move_to_active prep patch

2016-01-11 Thread Chris Wilson
This patch is broken out of the next just to remove the code motion from
that patch and make it more readable. What we do here is move the
i915_vma_move_to_active() to i915_gem_execbuffer.c and put the three
stages (read, write, fenced) together so that future modifications to
active handling are all located in the same spot. The importance of this
is so that we can more simply control the order in which the requests
are place in the retirement list (i.e. control the order at which we
retire and so control the lifetimes to avoid having to hold onto
references).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |  3 +-
 drivers/gpu/drm/i915/i915_gem.c  | 15 ---
 drivers/gpu/drm/i915/i915_gem_context.c  |  7 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 63 ++--
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 5 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0cc3ee589dfb..aa9d3782107e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2764,7 +2764,8 @@ int __must_check i915_mutex_lock_interruptible(struct 
drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
 struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
-struct drm_i915_gem_request *req);
+struct drm_i915_gem_request *req,
+unsigned flags);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 struct drm_device *dev,
 struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9a22fdd8a9f5..164ebdaa0369 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2026,21 +2026,6 @@ void *i915_gem_object_pin_vmap(struct 
drm_i915_gem_object *obj)
return obj->vmapping;
 }
 
-void i915_vma_move_to_active(struct i915_vma *vma,
-struct drm_i915_gem_request *req)
-{
-   struct drm_i915_gem_object *obj = vma->obj;
-   struct intel_engine_cs *engine = req->engine;
-
-   /* Add a reference if we're newly entering the active list. */
-   if (obj->active == 0)
-   drm_gem_object_reference(&obj->base);
-   obj->active |= intel_engine_flag(engine);
-
-   i915_gem_request_mark_active(req, &obj->last_read[engine->id]);
-   list_move_tail(&vma->vm_link, &vma->vm->active_list);
-}
-
 static void
 i915_gem_object_retire__fence(struct i915_gem_active *active,
  struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index fab702abd1cb..310a770b7984 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -778,8 +778,8 @@ static int do_switch(struct drm_i915_gem_request *req)
 * MI_SET_CONTEXT instead of when the next seqno has completed.
 */
if (from != NULL) {
-   from->legacy_hw_ctx.rcs_state->base.read_domains = 
I915_GEM_DOMAIN_INSTRUCTION;
-   
i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), 
req);
+   struct drm_i915_gem_object *obj = from->legacy_hw_ctx.rcs_state;
+
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 * whole damn pipeline, we don't need to explicitly mark the
 * object dirty. The only exception is that the context must be
@@ -787,7 +787,8 @@ static int do_switch(struct drm_i915_gem_request *req)
 * able to defer doing this until we know the object would be
 * swapped, but there is no way to do that yet.
 */
-   from->legacy_hw_ctx.rcs_state->dirty = 1;
+   obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+   i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
 
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c10795f58bfc..9e549bded186 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1104,6 +1104,44 @@ i915_gem_validate_context(struct drm_device *dev, struct 
drm_file *file,
return ctx;
 }
 
+void i915_vma_move_to_active(struct i915_vma *vma,
+struct drm_i915_gem_request *req,
+unsigned flags)
+{
+   struct drm_i915_gem_object *obj = vma->obj;
+   const unsigned engine = req->engine->id;
+
+   GEM_BUG_ON(!drm_mm_node_allocated(&vma->node))

[Intel-gfx] [PATCH 079/190] drm/i915: Reduce the pointer dance of i915_is_ggtt()

2016-01-11 Thread Chris Wilson
The multiple levels of indirect do nothing but hinder the compiler and
the pointer chasing turns to be quite painful but painless to fix.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 13 ++---
 drivers/gpu/drm/i915/i915_drv.h|  7 ---
 drivers/gpu/drm/i915/i915_gem.c| 18 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  5 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.c| 12 +---
 drivers/gpu/drm/i915/i915_gem_gtt.h|  5 +
 drivers/gpu/drm/i915/i915_trace.h  | 27 ---
 7 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index dd1788c81b90..99a6181b012e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -118,7 +118,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct 
drm_i915_gem_object *obj)
struct i915_vma *vma;
 
list_for_each_entry(vma, &obj->vma_list, obj_link) {
-   if (i915_is_ggtt(vma->vm) && drm_mm_node_allocated(&vma->node))
+   if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
size += vma->node.size;
}
 
@@ -165,12 +165,11 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
-  i915_is_ggtt(vma->vm) ? "g" : "pp",
+  vma->is_ggtt ? "g" : "pp",
   vma->node.start, vma->node.size);
-   if (i915_is_ggtt(vma->vm))
-   seq_printf(m, ", type: %u)", vma->ggtt_view.type);
-   else
-   seq_puts(m, ")");
+   if (vma->is_ggtt)
+   seq_printf(m, ", type: %u", vma->ggtt_view.type);
+   seq_puts(m, ")");
}
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
@@ -346,7 +345,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 
bound++;
 
-   if (i915_is_ggtt(vma->vm)) {
+   if (vma->is_ggtt) {
stats->global += vma->node.size;
} else {
struct i915_hw_ppgtt *ppgtt
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c9c1a5cdc1e5..f840cc55f1ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2905,18 +2905,11 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object 
*obj);
 /* Some GGTT VM helpers */
 #define i915_obj_to_ggtt(obj) \
(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
-static inline bool i915_is_ggtt(struct i915_address_space *vm)
-{
-   struct i915_address_space *ggtt =
-   &((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
-   return vm == ggtt;
-}
 
 static inline struct i915_hw_ppgtt *
 i915_vm_to_ppgtt(struct i915_address_space *vm)
 {
WARN_ON(i915_is_ggtt(vm));
-
return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 44bd514a6c2e..9a22fdd8a9f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2595,8 +2595,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
return ret;
}
 
-   if (i915_is_ggtt(vma->vm) &&
-   vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+   if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
i915_gem_object_finish_gtt(obj);
 
/* release the fence reg _after_ flushing */
@@ -2611,7 +2610,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
vma->bound = 0;
 
list_del_init(&vma->vm_link);
-   if (i915_is_ggtt(vma->vm)) {
+   if (vma->is_ggtt) {
if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
obj->map_and_fenceable = false;
} else if (vma->ggtt_view.pages) {
@@ -3880,17 +3879,14 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct 
drm_i915_gem_object *obj,
 
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
-   struct i915_address_space *vm = NULL;
WARN_ON(vma->node.allocated);
 
/* Keep the vma as a placeholder in the execbuffer reservation lists */
if (!list_empty(&vma->exec_list))
return;
 
-   vm = vma->vm;
-
-   if (!i915_is_ggtt(vm))
-   i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+   if (!vma->is_ggtt)
+   i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
list_del(&vma->obj_link);
 
@@ -4446,7 +4442,7 @@ u64 i915_gem_obj_offset(struct dr

[Intel-gfx] [PATCH 069/190] drm/i915: Remove duplicate golden render state init from execlists

2016-01-11 Thread Chris Wilson
Now that we use the same vfuncs for emitting the batch buffer in both
execlists and legacy, the golden render state initialisation is
identical between both.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 22 --
 drivers/gpu/drm/i915/i915_gem_render_state.h | 18 ---
 drivers/gpu/drm/i915/intel_lrc.c | 34 +---
 drivers/gpu/drm/i915/intel_renderstate.h | 16 +
 4 files changed, 27 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index ccc988c2b226..222f25777bb4 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,6 +28,15 @@
 #include "i915_drv.h"
 #include "intel_renderstate.h"
 
+struct render_state {
+   const struct intel_renderstate_rodata *rodata;
+   struct drm_i915_gem_object *obj;
+   u64 ggtt_offset;
+   int gen;
+   u32 aux_batch_size;
+   u32 aux_batch_offset;
+};
+
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(struct drm_device *dev, const int gen)
 {
@@ -163,14 +172,14 @@ err_out:
 
 #undef OUT_BATCH
 
-void i915_gem_render_state_fini(struct render_state *so)
+static void render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so->obj);
drm_gem_object_unreference(&so->obj->base);
 }
 
-int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
- struct render_state *so)
+static int render_state_prepare(struct intel_engine_cs *ring,
+   struct render_state *so)
 {
int ret;
 
@@ -186,7 +195,7 @@ int i915_gem_render_state_prepare(struct intel_engine_cs 
*ring,
 
ret = render_state_setup(so);
if (ret) {
-   i915_gem_render_state_fini(so);
+   render_state_fini(so);
return ret;
}
 
@@ -198,7 +207,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
struct render_state so;
int ret;
 
-   ret = i915_gem_render_state_prepare(req->engine, &so);
+   ret = render_state_prepare(req->engine, &so);
if (ret)
return ret;
 
@@ -222,8 +231,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
}
 
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-
 out:
-   i915_gem_render_state_fini(&so);
+   render_state_fini(&so);
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
b/drivers/gpu/drm/i915/i915_gem_render_state.h
index e641bb093a90..c44fca8599bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -26,24 +26,6 @@
 
 #include 
 
-struct intel_renderstate_rodata {
-   const u32 *reloc;
-   const u32 *batch;
-   const u32 batch_items;
-};
-
-struct render_state {
-   const struct intel_renderstate_rodata *rodata;
-   struct drm_i915_gem_object *obj;
-   u64 ggtt_offset;
-   int gen;
-   u32 aux_batch_size;
-   u32 aux_batch_offset;
-};
-
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
-void i915_gem_render_state_fini(struct render_state *so);
-int i915_gem_render_state_prepare(struct intel_engine_cs *ring,
- struct render_state *so);
 
 #endif /* _I915_GEM_RENDER_STATE_H_ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9838503fafca..2f92c43397eb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1627,38 +1627,6 @@ static int gen8_add_request(struct drm_i915_gem_request 
*request)
return 0;
 }
 
-static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
-{
-   struct render_state so;
-   int ret;
-
-   ret = i915_gem_render_state_prepare(req->engine, &so);
-   if (ret)
-   return ret;
-
-   if (so.rodata == NULL)
-   return 0;
-
-   ret = req->engine->emit_bb_start(req, so.ggtt_offset,
-so.rodata->batch_items * 4,
-I915_DISPATCH_SECURE);
-   if (ret)
-   goto out;
-
-   ret = req->engine->emit_bb_start(req,
-(so.ggtt_offset + so.aux_batch_offset),
-so.aux_batch_size,
-I915_DISPATCH_SECURE);
-   if (ret)
-   goto out;
-
-   i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-
-out:
-   i915_gem_render_state_fini(&so);
-   return ret;
-}
-
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 {
int ret;
@@ -1675,7 +1643,7 @@ static int gen8_init_rcs_context(struct 
drm_i915_gem_request *req)
if (ret)
   

[Intel-gfx] [PATCH 072/190] drm/i915: Execlists cannot pin a context without the object

2016-01-11 Thread Chris Wilson
Given that the intel_lr_context_pin cannot succeed without the object,
we cannot reach intel_lr_context_unpin() without first allocating that
object - so we can remove the redundant test.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 84a8bcc90d78..0f0bf97e4032 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -769,17 +769,14 @@ static int intel_lr_context_pin(struct 
drm_i915_gem_request *rq)
 void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 {
int engine = rq->engine->id;
-   struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[engine].state;
-   struct intel_ring *ring = rq->ring;
-
-   if (ctx_obj) {
-   WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex));
-   if (--rq->ctx->engine[engine].pin_count == 0) {
-   intel_ring_unmap(ring);
-   i915_gem_object_ggtt_unpin(ctx_obj);
-   i915_gem_context_unreference(rq->ctx);
-   }
-   }
+
+   WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex));
+   if (--rq->ctx->engine[engine].pin_count)
+   return;
+
+   intel_ring_unmap(rq->ring);
+   i915_gem_object_ggtt_unpin(rq->ctx->engine[engine].state);
+   i915_gem_context_unreference(rq->ctx);
 }
 
 static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request 
*req)
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 049/190] drm/i915: Disable waitboosting for mmioflips/semaphores

2016-01-11 Thread Chris Wilson
Since

commit a6f766f3975185af66a31a2cea2cd38721645999
Author: Chris Wilson 
Date:   Mon Apr 27 13:41:20 2015 +0100

drm/i915: Limit ring synchronisation (sw sempahores) RPS boosts

and

commit bcafc4e38b6ad03f48989b7ecaff03845b5b7acf
Author: Chris Wilson 
Date:   Mon Apr 27 13:41:21 2015 +0100

drm/i915: Limit mmio flip RPS boosts

we have limited the waitboosting for semaphores and flips. Ideally we do
not want to boost in either of these instances as no consumer is waiting
upon the results. With the introduction of NO_WAITBOOST in the previous
patch, we can finally disable these needless boosts.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 8 +---
 drivers/gpu/drm/i915/i915_drv.h  | 2 --
 drivers/gpu/drm/i915/i915_gem.c  | 2 +-
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 drivers/gpu/drm/i915/intel_pm.c  | 2 --
 5 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index b82482573a8f..5335072f2047 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2398,13 +2398,7 @@ static int i915_rps_boost_info(struct seq_file *m, void 
*data)
   list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
-   seq_printf(m, "Semaphore boosts: %d%s\n",
-  dev_priv->rps.semaphores.boosts,
-  list_empty(&dev_priv->rps.semaphores.link) ? "" : ", 
active");
-   seq_printf(m, "MMIO flip boosts: %d%s\n",
-  dev_priv->rps.mmioflips.boosts,
-  list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
-   seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+   seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(&dev_priv->rps.client_lock);
 
return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee146ce02412..49a151126b2a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1136,8 +1136,6 @@ struct intel_gen6_power_mgmt {
struct delayed_work delayed_resume_work;
unsigned boosts;
 
-   struct intel_rps_client semaphores, mmioflips;
-
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fd61e722b595..9df00e694cd9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2533,7 +2533,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
  NULL,
- &i915->rps.semaphores);
+ NO_WAITBOOST);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index ae247927e931..e2822530af25 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11430,7 +11430,7 @@ static void intel_mmio_flip_work_func(struct 
work_struct *work)
if (mmio_flip->req) {
WARN_ON(__i915_wait_request(mmio_flip->req,
false, NULL,
-   &mmio_flip->i915->rps.mmioflips));
+   NO_WAITBOOST));
i915_gem_request_put(mmio_flip->req);
}
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 39b7ca9c3e66..b340f2a1f110 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7324,8 +7324,6 @@ void intel_pm_setup(struct drm_device *dev)
INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
  intel_gen6_powersave_work);
INIT_LIST_HEAD(&dev_priv->rps.clients);
-   INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
-   INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
dev_priv->pm.suspended = false;
atomic_set(&dev_priv->pm.wakeref_count, 0);
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 003/190] drm/i915: Add an optional selection from i915 of CONFIG_MMU_NOTIFIER

2016-01-11 Thread Chris Wilson
userptr requires mmu-notifier for full unprivileged support. Most
systems have mmu-notifier support already enabled as a requirement for
virtualisation support, but we should make the option for i915 to take
advantage of mmu-notifiers explicit (and enable by default so that
regular userspace can take advantage of passing client memory to the
GPU.)

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/Kconfig | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index fcd77b27514d..b979295aab82 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -48,3 +48,14 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
  option changes the default for that module option.
 
  If in doubt, say "N".
+
+config DRM_I915_USERPTR
+   bool "Always enable userptr support"
+   depends on DRM_I915
+   select MMU_NOTIFIER
+   default y
+   help
+ This option selects CONFIG_MMU_NOTIFIER if it isn't already
+ selected to enabled full userptr support.
+
+ If in doubt, say "Y".
-- 
2.7.0.rc3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 086/190] drm/i915: Mark the context and address space as closed

2016-01-11 Thread Chris Wilson
When the user closes the context mark it and the dependent address space
as closed. As we use an asynchronous destruct method, this has two purposes.
First it allows us to flag the closed context and detect internal errors if
we to create any new objects for it (as it is removed from the user's
namespace, these should be internal bugs only). And secondly, it allows
us to immediately reap stale vma.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 17 +++---
 drivers/gpu/drm/i915/i915_gem_context.c | 40 +
 drivers/gpu/drm/i915/i915_gem_gtt.c |  9 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  9 
 drivers/gpu/drm/i915/i915_gem_stolen.c  |  2 +-
 6 files changed, 65 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 262d1b247344..fc35a9b8d910 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -888,6 +888,8 @@ struct intel_context {
} engine[I915_NUM_RINGS];
 
struct list_head link;
+
+   bool closed:1;
 };
 
 enum fb_op_origin {
@@ -2707,6 +2709,7 @@ int __must_check i915_vma_unbind(struct i915_vma *vma);
  * _guarantee_ VMA in question is _not in use_ anywhere.
  */
 int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
+void i915_vma_close(struct i915_vma *vma);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f95cf39b7d2..16ee3bd7010e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2385,7 +2385,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object 
*obj)
}
 }
 
-static void i915_vma_close(struct i915_vma *vma)
+void i915_vma_close(struct i915_vma *vma)
 {
GEM_BUG_ON(vma->closed);
vma->closed = true;
@@ -2654,12 +2654,15 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
return ret;
}
 
-   trace_i915_vma_unbind(vma);
-
-   vma->vm->unbind_vma(vma);
+   if (likely(!vma->vm->closed)) {
+   trace_i915_vma_unbind(vma);
+   vma->vm->unbind_vma(vma);
+   }
vma->bound = 0;
 
-   list_del_init(&vma->vm_link);
+   drm_mm_remove_node(&vma->node);
+   list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
+
if (vma->is_ggtt) {
if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
obj->map_and_fenceable = false;
@@ -2670,8 +2673,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
vma->ggtt_view.pages = NULL;
}
 
-   drm_mm_remove_node(&vma->node);
-
/* Since the unbound list is global, only move to that list if
 * no more VMAs exist. */
if (--obj->bind_count == 0)
@@ -2917,7 +2918,7 @@ search_free:
goto err_remove_node;
 
list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-   list_add_tail(&vma->vm_link, &vm->inactive_list);
+   list_move_tail(&vma->vm_link, &vm->inactive_list);
obj->bind_count++;
 
return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 310a770b7984..4583d8fe3585 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -153,6 +153,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
 
trace_i915_context_free(ctx);
+   GEM_BUG_ON(!ctx->closed);
 
if (i915.enable_execlists)
intel_lr_context_free(ctx);
@@ -209,6 +210,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t 
size)
return obj;
 }
 
+static void i915_ppgtt_close(struct i915_address_space *vm)
+{
+   struct list_head *phases[] = {
+   &vm->active_list,
+   &vm->inactive_list,
+   &vm->unbound_list,
+   NULL,
+   }, **phase;
+
+   GEM_BUG_ON(i915_is_ggtt(vm));
+   GEM_BUG_ON(vm->closed);
+   vm->closed = true;
+
+   for (phase = phases; *phase; phase++) {
+   struct i915_vma *vma, *vn;
+
+   list_for_each_entry_safe(vma, vn, *phase, vm_link)
+   if (!vma->closed)
+   i915_vma_close(vma);
+   }
+}
+
+static void context_close(struct intel_context *ctx)
+{
+   GEM_BUG_ON(ctx->closed);
+   ctx->closed = true;
+   if (ctx->ppgtt)
+   i915_ppgtt_close(&ctx->ppgtt->base);
+   i915_gem_context_unreference(ctx);
+}
+
 static struct intel_context *
 __create_hw_context(struct drm_device *dev,
struct drm_i915_file_private *file_priv)
@@ -256,7 +288,7 @@ __create_

[Intel-gfx] [PATCH 052/190] drm/i915: Treat ringbuffer writes as write to normal memory

2016-01-11 Thread Chris Wilson
Ringbuffers are now being written to either through LLC or WC paths, so
treating them as simply iomem is no longer adequate. However, for the
older !llc hardware, the hardware is documentated as treating the TAIL
register update as serialising, so we can relax the barriers when filling
the rings (but even if it were not, it is still an uncached register write
and so serialising anyway.).

For simplicity, let's ignore the iomem annotation.

v2: Remove iomem from ringbuffer->virtual_address

Signed-off-by: Chris Wilson 
Reviewed-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/intel_lrc.c|  7 +--
 drivers/gpu/drm/i915/intel_lrc.h|  6 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c |  7 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 19 +--
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 433e9f60e926..527eaf59be25 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -766,13 +766,8 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
 
 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
-   uint32_t __iomem *virt;
int rem = ringbuf->size - ringbuf->tail;
-
-   virt = ringbuf->virtual_start + ringbuf->tail;
-   rem /= 4;
-   while (rem--)
-   iowrite32(MI_NOOP, virt++);
+   memset(ringbuf->virtual_start + ringbuf->tail, 0, rem);
 
ringbuf->tail = 0;
intel_ring_update_space(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index de41ad6cd63d..1e58f2550777 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -71,8 +71,9 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request 
*req);
  */
 static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
 {
-   ringbuf->tail &= ringbuf->size - 1;
+   intel_ringbuffer_advance(ringbuf);
 }
+
 /**
  * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
  * @ringbuf: Ringbuffer to write to.
@@ -81,8 +82,7 @@ static inline void intel_logical_ring_advance(struct 
intel_ringbuffer *ringbuf)
 static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
   u32 data)
 {
-   iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-   ringbuf->tail += 4;
+   intel_ringbuffer_emit(ringbuf, data);
 }
 static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer 
*ringbuf,
   i915_reg_t reg)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2728c0ca0871..02b7032e16e0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2099,13 +2099,8 @@ static int ring_wait_for_space(struct intel_engine_cs 
*ring, int n)
 
 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
-   uint32_t __iomem *virt;
int rem = ringbuf->size - ringbuf->tail;
-
-   virt = ringbuf->virtual_start + ringbuf->tail;
-   rem /= 4;
-   while (rem--)
-   iowrite32(MI_NOOP, virt++);
+   memset(ringbuf->virtual_start + ringbuf->tail, 0, rem);
 
ringbuf->tail = 0;
intel_ring_update_space(ringbuf);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a1fcb6c7501f..7669a8d30f27 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -99,7 +99,7 @@ struct intel_ring_hangcheck {
 
 struct intel_ringbuffer {
struct drm_i915_gem_object *obj;
-   void __iomem *virtual_start;
+   void *virtual_start;
 
struct intel_engine_cs *ring;
struct list_head link;
@@ -468,12 +468,20 @@ int intel_ring_alloc_request_extras(struct 
drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
+static inline void intel_ringbuffer_emit(struct intel_ringbuffer *rb,
+u32 data)
+{
+   *(uint32_t *)(rb->virtual_start + rb->tail) = data;
+   rb->tail += 4;
+}
+static inline void intel_ringbuffer_advance(struct intel_ringbuffer *rb)
+{
+   rb->tail &= rb->size - 1;
+}
 static inline void intel_ring_emit(struct intel_engine_cs *ring,
   u32 data)
 {
-   struct intel_ringbuffer *ringbuf = ring->buffer;
-   iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-   ringbuf->tail += 4;
+   intel_ringbuffer_emit(ring->buffer, data);
 }
 static inline void intel_ring_emit_reg(struct intel_engine_cs *ring,
   i915_reg_t reg)
@@ -482,8 +490,7 @@ static inline void intel_ring_emit_reg(struct 
intel_engine_cs *r

[Intel-gfx] [PATCH 085/190] drm/i915: Release vma when the handle is closed

2016-01-11 Thread Chris Wilson
In order to prevent a leak of the vma on shared objects, we need to
hook into the object_close callback to destroy the vma on the object for
this file. However, if we destroyed that vma immediately we may cause
unexpected application stalls as we try to unbind a busy vma - hence we
defer the unbind to when we retire the vma.

v2: Keep vma allocated until closed. This is useful for a later
optimisation, but it is required now in order to handle potential
recursion of i915_vma_unbind() by retiring itself.
v3: Comments are important.

Testcase: igt/gem_ppggtt/flink-and-close-vma-leak
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio closed);
+   vma->closed = true;
+
+   list_del_init(&vma->obj_link);
+   if (!vma->active)
+   WARN_ON(i915_vma_unbind(vma));
+}
+
+void i915_gem_close_object(struct drm_gem_object *gem,
+  struct drm_file *file)
+{
+   struct drm_i915_gem_object *obj = to_intel_bo(gem);
+   struct drm_i915_file_private *fpriv = file->driver_priv;
+   struct i915_vma *vma, *vn;
+
+   mutex_lock(&obj->base.dev->struct_mutex);
+   list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
+   if (vma->vm->file == fpriv)
+   i915_vma_close(vma);
+   mutex_unlock(&obj->base.dev->struct_mutex);
+}
+
 /**
  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
  * @DRM_IOCTL_ARGS: standard ioctl arguments
@@ -2571,31 +2595,56 @@ static void i915_gem_object_finish_gtt(struct 
drm_i915_gem_object *obj)
old_write_domain);
 }
 
+static void i915_vma_destroy(struct i915_vma *vma)
+{
+   GEM_BUG_ON(vma->node.allocated);
+   GEM_BUG_ON(vma->active);
+   GEM_BUG_ON(!vma->closed);
+
+   list_del(&vma->vm_link);
+   if (!vma->is_ggtt)
+   i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
+
+   kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
+}
+
 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 {
struct drm_i915_gem_object *obj = vma->obj;
-   int ret;
+   int ret, i;
 
-   if (list_empty(&vma->obj_link))
-   return 0;
+   /* First wait upon any activity as retiring the request may
+* have side-effects such as unpinning or even unbinding this vma.
+*/
+   if (vma->active && wait) {
+   bool was_closed;
 
-   if (!drm_mm_node_allocated(&vma->node)) {
-   i915_gem_vma_destroy(vma);
-   return 0;
+   /* When a closed VMA is retired, it is unbound - eek. */
+   was_closed = vma->closed;
+   vma->closed = false;
+
+   for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) {
+   ret = i915_wait_request(vma->last_read[i].request);
+   if (ret)
+   break;
+   }
+
+   vma->closed = was_closed;
+   if (ret)
+   return ret;
+
+   GEM_BUG_ON(vma->active);
}
 
if (vma->pin_count)
return -EBUSY;
 
+   if (!drm_mm_node_allocated(&vma->node))
+   goto destroy;
+
GEM_BUG_ON(obj->bind_count == 0);
GEM_BUG_ON(obj->pages == NULL);
 
-   if (wait) {
-   ret = i915_gem_object_wait_rendering(obj, false);
-   if (ret)
-   return ret;
-   }
-
if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
i915_gem_object_finish_gtt(obj);
 
@@ -2622,7 +2671,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
}
 
drm_mm_remove_node(&vma->node);
-   i915_gem_vma_destroy(vma);
 
/* Since the unbound list is global, only move to that list if
 * no more VMAs exist. */
@@ -2636,6 +2684,10 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
 */
i915_gem_object_unpin_pages(obj);
 
+destroy:
+   if (unlikely(vma->closed))
+   i915_vma_destroy(vma);
+
return 0;
 }
 
@@ -2814,7 +2866,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
 
if (offset & (alignment - 1) || offset + size > end) {
ret = -EINVAL;
-   goto err_free_vma;
+   goto err_vma;
}
vma->node.start = offset;
vma->node.size = size;
@@ -2826,7 +2878,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
ret = drm_mm_reserve_node(&vm->mm, &vma->node);
}
if (ret)
-   goto err_free_vma;
+   goto err_vma;
} else {
if (flags & PIN_HIGH) {
search_flag = DRM_MM_SEARCH_BELOW;
@@ -2851,7 +2903,7 @@ search_free:
if (ret == 0)
 

  1   2   3   4   5   >