[Intel-gfx] i-g-t + GDB

2014-05-16 Thread Michael H Nguyen

Hi,

I am trying to step through i-g-t & libdrm source w/ GDB but single 
stepping seems erratic so I am guessing the build is optimized. Is 
changing "CFLAGS = -g -O2" to "CFLAGS = -g -O0" the right thing to do? 
If so, how can I do that globally one time versus touching every 
Makefile inside the igt and libdrm projects? Maybe pass an additional 
parameter to autogen.sh, I'm not sure ?


Thanks,
Mike


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] i-g-t + GDB

2014-05-16 Thread Michael H Nguyen


On 05/16/2014 04:29 PM, Michael H Nguyen wrote:

Hi,

I am trying to step through i-g-t & libdrm source w/ GDB but single 
stepping seems erratic so I am guessing the build is optimized. Is 
changing "CFLAGS = -g -O2" to "CFLAGS = -g -O0" the right thing to do? 
If so, how can I do that globally one time versus touching every 
Makefile inside the igt and libdrm projects? Maybe pass an additional 
parameter to autogen.sh, I'm not sure ?



Figured it out. For anyone interested...

$ ./autogen.sh --prefix= CFLAGS="-g -O0"

CFLAGS gets passed to ./configure which generates the make files w/ 
"CFLAGS = -g -O0"



Thanks,
Mike


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Add MI_SET_APPID cmd to cmd parser tables

2014-11-21 Thread michael . h . nguyen
From: "Michael H. Nguyen" 

Was missing

Issue: VIZ-4701
Signed-off-by: Michael H. Nguyen 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 14 +++---
 drivers/gpu/drm/i915/i915_reg.h|  3 +++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 22c992a..364aff7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -152,6 +152,7 @@ static const struct drm_i915_cmd_descriptor render_cmds[] = 
{
CMD(  MI_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_TOPOLOGY_FILTER,   SMI,F,  1,  S  ),
CMD(  MI_DISPLAY_FLIP,  SMI,   !F,  0xFF,   R  ),
+   CMD(  MI_SET_APPID, SMI,F,  1,  S  ),
CMD(  MI_SET_CONTEXT,   SMI,   !F,  0xFF,   R  ),
CMD(  MI_URB_CLEAR, SMI,   !F,  0xFF,   S  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0x3F,   B,
@@ -210,6 +211,7 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
CMD(  MI_SET_PREDICATE, SMI,F,  1,  S  ),
CMD(  MI_RS_CONTROL,SMI,F,  1,  S  ),
CMD(  MI_URB_ATOMIC_ALLOC,  SMI,F,  1,  S  ),
+   CMD(  MI_SET_APPID, SMI,F,  1,  S  ),
CMD(  MI_RS_CONTEXT,SMI,F,  1,  S  ),
CMD(  MI_LOAD_SCAN_LINES_INCL,  SMI,   !F,  0x3F,   M  ),
CMD(  MI_LOAD_SCAN_LINES_EXCL,  SMI,   !F,  0x3F,   R  ),
@@ -229,6 +231,7 @@ static const struct drm_i915_cmd_descriptor 
hsw_render_cmds[] = {
 
 static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
+   CMD(  MI_SET_APPID, SMI,F,  1,  S  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -272,6 +275,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
 
 static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
CMD(  MI_ARB_ON_OFF,SMI,F,  1,  R  ),
+   CMD(  MI_SET_APPID, SMI,F,  1,  S  ),
CMD(  MI_STORE_DWORD_IMM,   SMI,   !F,  0xFF,   B,
  .bits = {{
.offset = 0,
@@ -481,13 +485,17 @@ static u32 gen7_bsd_get_cmd_length_mask(u32 cmd_header)
u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
u32 subclient =
(cmd_header & INSTR_SUBCLIENT_MASK) >> INSTR_SUBCLIENT_SHIFT;
+   u32 op = (cmd_header & INSTR_26_TO_24_MASK) >> INSTR_26_TO_24_SHIFT;
 
if (client == INSTR_MI_CLIENT)
return 0x3F;
else if (client == INSTR_RC_CLIENT) {
-   if (subclient == INSTR_MEDIA_SUBCLIENT)
-   return 0xFFF;
-   else
+   if (subclient == INSTR_MEDIA_SUBCLIENT) {
+   if (op == 6)
+   return 0x;
+   else
+   return 0xFFF;
+   } else
return 0xFF;
}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 7a77cd5..c881d88 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -204,6 +204,8 @@
 #define INSTR_SUBCLIENT_SHIFT   27
 #define INSTR_SUBCLIENT_MASK0x1800
 #define   INSTR_MEDIA_SUBCLIENT 0x2
+#define INSTR_26_TO_24_MASK0x700
+#define   INSTR_26_TO_24_SHIFT 24
 
 /*
  * Memory interface instructions used by the kernel
@@ -233,6 +235,7 @@
 #define MI_BATCH_BUFFER_ENDMI_INSTR(0x0a, 0)
 #define MI_SUSPEND_FLUSH   MI_INSTR(0x0b, 0)
 #define   MI_SUSPEND_FLUSH_EN  (1<<0)
+#define MI_SET_APPID   MI_INSTR(0x0e, 0)
 #define MI_OVERLAY_FLIPMI_INSTR(0x11, 0)
 #define   MI_OVERLAY_CONTINUE  (0x0<<21)
 #define   MI_OVERLAY_ON(0x1<<21)
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 7/7] drm/i915: Tidy up execbuffer command parsing code

2014-11-21 Thread Michael H. Nguyen

Hi Chris,



+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   shadow_batch_obj->madv = I915_MADV_WILLNEED;
+
+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret)
+   goto err;


Pardon? This feels an implementation issue of i915_parse_cmds() and should
be resolved there. Presumably you are not actually reading back through
the GTT? That would be insane...


+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);


Yet pin+unpin around the parser seems to serve no other purpose.
Are you suggesting to remove the pin/unpin calls? If so, isn't pinning 
needed to ensure the backing store pages are available in vmap_batch()? 
i.e. obj->pages->sgl is populated w/ physical pages.


Or, are you suggesting to move the pin/unpin calls inside 
i915_parse_cmds() ?


Thx,
Mike

-Chris




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 1/7] drm/i915: Implement a framework for batch buffer pools

2014-11-21 Thread Michael H. Nguyen

Hi Daniel, Chris

On 11/12/2014 08:38 AM, Chris Wilson wrote:

On Wed, Nov 12, 2014 at 05:33:08PM +0100, Daniel Vetter wrote:

On Wed, Nov 12, 2014 at 10:46 AM, Chris Wilson  wrote:

On Wed, Nov 12, 2014 at 09:44:34AM +0100, Daniel Vetter wrote:

On Fri, Nov 07, 2014 at 02:22:01PM -0800, bradley.d.vol...@intel.com wrote:

+   if (obj && obj->madv == __I915_MADV_PURGED) {
+   was_purged = true;
+   list_del(&obj->batch_pool_list);
+   drm_gem_object_unreference(&obj->base);
+   obj = NULL;
+   }


Minor issue: We should move the purged check into the loop so that purge
buffer structs get released even when they're too small/big. Otherwise
we'll have a good chance to hang onto gobloads of structs forever.


I mentioned that we should do the purge of structs in our oom-notifier
as well to be safe.
I understand Daniel's suggestion to move the purge check into the loop 
(will do that) but I'm not familiar w/ the oom-notifier at all and so 
don't know how to do what Chris is asking w/ out ramping up. Is it not 
critical, follow up type work or is it absolutely necessary to have 
before merge? It sounded like the first.


Thx,
Mike


Given that we don't purge the sturcts for userspace purged objects (we
could just clear everything but the idr slot and mark it specially) I
don't think we need this here. At least not until we have this for
userspace bos since there's lots more of those I think.


Well discarding userspace purged objects requires a special zombie
handle, but these pure in-kernel structs we have complete control over
and so are much simpler to clean up.
-Chris


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 5/7] drm/i915: Use batch length instead of object size in command parser

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 48 --
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 69e51fa..489ff43 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -842,11 +842,19 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_start_offset,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
-   u32 *src_addr, *dest_addr = NULL;
+   u32 *src_base, *dest_base = NULL;
+   u32 *src_addr, *dest_addr;
+   u32 offset = batch_start_offset / sizeof(*dest_addr);
+   u32 end = batch_start_offset + batch_len;
+
+   if (end > dest_obj->base.size || end > src_obj->base.size)
+   return ERR_PTR(-E2BIG);
 
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
if (ret) {
@@ -854,15 +862,17 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
return ERR_PTR(ret);
}
 
-   src_addr = vmap_batch(src_obj);
-   if (!src_addr) {
+   src_base = vmap_batch(src_obj);
+   if (!src_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
ret = -ENOMEM;
goto unpin_src;
}
 
+   src_addr = src_base + offset;
+
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -870,24 +880,27 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   dest_addr = vmap_batch(dest_obj);
-   if (!dest_addr) {
+   dest_base = vmap_batch(dest_obj);
+   if (!dest_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
ret = -ENOMEM;
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj->base.size);
-   if (dest_obj->base.size > src_obj->base.size)
-   memset((u8 *)dest_addr + src_obj->base.size, 0,
-  dest_obj->base.size - src_obj->base.size);
+   dest_addr = dest_base + offset;
+
+   if (batch_start_offset != 0)
+   memset((u8 *)dest_base, 0, batch_start_offset);
+
+   memcpy(dest_addr, src_addr, batch_len);
+   memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end);
 
 unmap_src:
-   vunmap(src_addr);
+   vunmap(src_base);
 unpin_src:
i915_gem_object_unpin_pages(src_obj);
 
-   return ret ? ERR_PTR(ret) : dest_addr;
+   return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1008,6 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1020,6 +1034,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1027,7 +1042,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj,
+   batch_start_offset, b

[Intel-gfx] [PATCH v5 0/7] Command parser batch buffer copy

2014-11-26 Thread michael . h . nguyen
From: "Michael H. Nguyen" 

This is v5 of the series sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-November/055141.html

This version incorporates the following feedback from v4. 

- 0/7 Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the
  parser (danvet)
- 1/7 Move purged check inside the loop (danvet)
- 6/7 Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
  fnc (danvet)
- 7/7 Move pin/unpin calls inside i915_parse_cmds() (Chris W)

Issue: VIZ-4719
Brad Volkin (7):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Add a batch pool debugfs file
  drm/i915: Add batch pool details to i915_gem_objects debugfs
  drm/i915: Use batch length instead of object size in command parser
  drm/i915: Mark shadow batch buffers as purgeable
  drm/i915: Tidy up execbuffer command parsing code

 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c |  99 ---
 drivers/gpu/drm/i915/i915_debugfs.c|  86 ++--
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  24 +
 drivers/gpu/drm/i915/i915_gem.c|   3 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 154 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  99 +++
 9 files changed, 430 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 1/7] drm/i915: Implement a framework for batch buffer pools

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up, get to obtain a new buffer. Note that all buffers are
expected to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 Documentation/DocBook/drm.tmpl |   5 +
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|  15 +++
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152 +
 5 files changed, 174 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 944e8ed..b5943d4 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4028,6 +4028,11 @@ int num_ioctls;
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   
   
+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
 Logical Rings, Logical Ring Contexts and Execlists
 !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
 !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e4083e4..c8dbb37d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c4f2cb6..b12a062 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1154,6 +1154,12 @@ struct intel_l3_parity {
int which_slice;
 };
 
+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
 struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1885,6 +1891,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
@@ -2849,6 +2857,13 @@ void i915_destroy_error_state(struct drm_device *dev);
 void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
+struct drm_i915_gem_object*
+i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
+
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(void);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 86cf428..b19dd06 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4433,6 +4433,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->ring_list);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
+   INIT_LIST_HEAD(&obj->batch_pool_list);
 
obj->ops = ops;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
index 000..ccbe8f9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * 

[Intel-gfx] [PATCH v5 2/7] drm/i915: Use batch pools with the command parser

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser and dispatches the copied
(shadow) batch to the hardware.

After this patch, the parser is in 'enabling' mode.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb->vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

v3:
- Rebase on chained batch handling
- Squash in setting the secure dispatch flag
- Add a note about the interaction w/secure dispatch pinning
- Check for request->batch_obj == NULL in i915_gem_free_request

v4:
- Fix read domains for shadow_batch_obj
- Remove the set_to_gtt_domain call from i915_parse_cmds
- ggtt_pin/unpin in the parser block to simplify error handling
- Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag
- Remove i915_gem_batch_pool_put calls

v5:
- Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after
  the parser (danvet, from v4 0/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 79 +++---
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  8 +++
 drivers/gpu/drm/i915/i915_gem.c|  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 +---
 5 files changed, 119 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 22c992a..69e51fa 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -840,6 +840,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n");
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj->base.size);
+   if (dest_obj->base.size > src_obj->base.size)
+   memset((u8 *)dest_addr + src_obj->base.size, 0,
+  dest_obj->base.size - src_obj->base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -956,6 +1006,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -967,32 +1018,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   r

[Intel-gfx] [PATCH v5 4/7] drm/i915: Add batch pool details to i915_gem_objects debugfs

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

To better account for the potentially large memory consumption
of the batch pool.

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 45 +
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index efdd59a..60d5ceb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -360,6 +360,38 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n", \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(&stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.active_list,
+   batch_pool_list)
+   per_file_stats(0, obj, &stats);
+
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.inactive_list,
+   batch_pool_list)
+   per_file_stats(0, obj, &stats);
+
+   print_file_stats(m, "batch pool", stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma->obj); \
@@ -442,6 +474,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -459,15 +494,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
 */
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
-   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu 
inactive, %zu global, %zu shared, %zu unbound)\n",
-  task ? task->comm : "",
-  stats.count,
-  stats.total,
-  stats.active,
-  stats.inactive,
-  stats.global,
-  stats.shared,
-  stats.unbound);
+   print_file_stats(m, task ? task->comm : "", stats);
rcu_read_unlock();
}
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 3/7] drm/i915: Add a batch pool debugfs file

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

It provides some useful information about the buffers in
the global command parser batch pool.

v2: rebase on global pool instead of per-ring pools

v3: rebase

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 41 +
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 319da61..efdd59a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -582,6 +582,46 @@ static int i915_gem_pageflip_info(struct seq_file *m, void 
*data)
return 0;
 }
 
+static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = m->private;
+   struct drm_device *dev = node->minor->dev;
+   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct drm_i915_gem_object *obj;
+   int count = 0;
+   int ret;
+
+   ret = mutex_lock_interruptible(&dev->struct_mutex);
+   if (ret)
+   return ret;
+
+   seq_puts(m, "active:\n");
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.active_list,
+   batch_pool_list) {
+   seq_puts(m, "   ");
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_puts(m, "inactive:\n");
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.inactive_list,
+   batch_pool_list) {
+   seq_puts(m, "   ");
+   describe_obj(m, obj);
+   seq_putc(m, '\n');
+   count++;
+   }
+
+   seq_printf(m, "total: %d\n", count);
+
+   mutex_unlock(&dev->struct_mutex);
+
+   return 0;
+}
+
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
struct drm_info_node *node = m->private;
@@ -4262,6 +4302,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_gem_hws_blt", i915_hws_info, 0, (void *)BCS},
{"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS},
{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
+   {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 6/7] drm/i915: Mark shadow batch buffers as purgeable

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

By adding a new exec_entry flag, we cleanly mark the shadow objects
as purgeable after they are on the active list.

v2:
- Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
  fnc (danvet, from v4 6/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ccbe8f9..01333bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -146,6 +146,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_add_tail(&obj->batch_pool_list, &pool->inactive_list);
}
 
+   obj->madv = I915_MADV_WILLNEED;
+
list_move_tail(&obj->batch_pool_list, &pool->active_list);
 
return obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 76ae2da..07b952f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -223,7 +224,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
vma->pin_count--;
 
-   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+   if (entry->flags & __EXEC_OBJECT_PURGEABLE)
+   obj->madv = I915_MADV_DONTNEED;
+
+   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1372,6 +1378,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
 
+   shadow_batch_obj->madv = I915_MADV_WILLNEED;
+
ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
if (ret)
goto err;
@@ -1395,6 +1403,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = &shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v5 7/7] drm/i915: Tidy up execbuffer command parsing code

2014-11-26 Thread michael . h . nguyen
From: Brad Volkin 

Move it to a separate function since the main do_execbuffer function
already has so much going on.

v2:
- Move pin/unpin calls inside i915_parse_cmds() (Chris W, v4 7/7
  feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |   8 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 127 -
 2 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 489ff43..eecbfcb 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1042,10 +1042,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
+   return -1;
+   }
+
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
return PTR_ERR(batch_base);
}
 
@@ -1116,6 +1123,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
}
 
vunmap(batch_base);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 07b952f..bef9bcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1026,6 +1026,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
 }
 
+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;
+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma->exec_entry = shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(&shadow_batch_obj->base);
+   list_add_tail(&vma->exec_list, &eb->vmas);
+
+   shadow_batch_obj->base.pending_read_domains =
+   batch_obj->base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the command parser is
+* enabled.
+*
+* FIXME: with aliasing ppgtt, buffers that should only
+* be in ggtt still end up in the aliasing ppgtt. remove
+* this check when that is fixed.
+*/
+   if (USES_FULL_PPGTT(dev))
+   *flags |= I915_DISPATCH_SECURE;
+   }
+
+   return ret ? ERR_PTR(ret) : shadow_batch_obj;
+}
+
 int
 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
   struct intel_engine_cs *ring,
@@ -1242,7 +1302,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_private *dev_priv = dev->dev_private;
struct eb_vmas *eb;
struct drm_i915_gem_object *batch_obj;
-   struct drm_i915_gem_object *shadow_batch_obj = NULL;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1368,63 +1427,17 @@ i915_ge

Re: [Intel-gfx] [PATCH v5 0/7] Command parser batch buffer copy

2014-12-01 Thread Michael H. Nguyen



On 11/26/2014 11:44 PM, Chris Wilson wrote:

On Wed, Nov 26, 2014 at 01:53:34PM -0800, michael.h.ngu...@intel.com wrote:

From: "Michael H. Nguyen" 

This is v5 of the series sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-November/055141.html

This version incorporates the following feedback from v4.

- 0/7 Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the
   parser (danvet)
- 1/7 Move purged check inside the loop (danvet)
- 6/7 Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
   fnc (danvet)
- 7/7 Move pin/unpin calls inside i915_parse_cmds() (Chris W)

Issue: VIZ-4719
Brad Volkin (7):
   drm/i915: Implement a framework for batch buffer pools
   drm/i915: Use batch pools with the command parser
   drm/i915: Add a batch pool debugfs file
   drm/i915: Add batch pool details to i915_gem_objects debugfs
   drm/i915: Use batch length instead of object size in command parser
   drm/i915: Mark shadow batch buffers as purgeable
   drm/i915: Tidy up execbuffer command parsing code


This still does not incorporate the feedback from the last N cycles.

Chiefly: A single cache list, madvise on creation, and squash the
framework and debugging patches into one.


Re: single cache list

OK. Found the feedback in an old rev.

Re: madvise on creation

Were you referring to this?

from 
http://lists.freedesktop.org/archives/intel-gfx/2014-November/055060.htm


obj = i915_gem_obj_alloc();
i915_gem_object_get_pages(obj);
obj->madv = I915_MADV_DONTNEED;

If so, I don't understand . _get is returning obj and it'll be needed so 
would expect to set 'obj->madv = I915_MADV_WILLNEED' which is the case now.


For this feedback, would appreciate it if you could provide comments or 
proposed solutions in line w/ source. That would be helpful for me to 
get whatever context I'm missing.


Re: Squash

OK. I didn't see this feedback. Will do.

Thanks,
-Mike


-Chris


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 0/7] Command parser batch buffer copy

2014-12-02 Thread Michael H. Nguyen



On 12/02/2014 01:45 AM, Daniel Vetter wrote:

On Mon, Dec 01, 2014 at 02:39:51PM -0800, Michael H. Nguyen wrote:



On 11/26/2014 11:44 PM, Chris Wilson wrote:

On Wed, Nov 26, 2014 at 01:53:34PM -0800, michael.h.ngu...@intel.com wrote:

From: "Michael H. Nguyen" 

This is v5 of the series sent here:
http://lists.freedesktop.org/archives/intel-gfx/2014-November/055141.html

This version incorporates the following feedback from v4.

- 0/7 Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after the
   parser (danvet)
- 1/7 Move purged check inside the loop (danvet)
- 6/7 Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
   fnc (danvet)
- 7/7 Move pin/unpin calls inside i915_parse_cmds() (Chris W)

Issue: VIZ-4719
Brad Volkin (7):
   drm/i915: Implement a framework for batch buffer pools
   drm/i915: Use batch pools with the command parser
   drm/i915: Add a batch pool debugfs file
   drm/i915: Add batch pool details to i915_gem_objects debugfs
   drm/i915: Use batch length instead of object size in command parser
   drm/i915: Mark shadow batch buffers as purgeable
   drm/i915: Tidy up execbuffer command parsing code


This still does not incorporate the feedback from the last N cycles.

Chiefly: A single cache list, madvise on creation, and squash the
framework and debugging patches into one.


Re: single cache list

OK. Found the feedback in an old rev.


Btw in your patch you have a fancy retry loop looking at was_purged. On a
quick look that shouldn't be needed and a simple list-walk should be all
you need I think.


Right. That has been there for a couple of rev's. I have it addressed in 
the RFC sent to you and Chris privately. Let me know if what I sent 
isn't want you were hoping for.



-Daniel


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 0/7] Command parser batch buffer copy

2014-12-02 Thread Michael H. Nguyen


On 12/02/2014 03:13 AM, Chris Wilson wrote:
> On Mon, Dec 01, 2014 at 02:39:51PM -0800, Michael H. Nguyen wrote:
>> Re: madvise on creation
>>
>> Were you referring to this?
>>
>> from http://lists.freedesktop.org/archives/intel-gfx/2014-November/055060.htm
>>
>>  obj = i915_gem_obj_alloc();
>>  i915_gem_object_get_pages(obj);
>>  obj->madv = I915_MADV_DONTNEED;
>>
>> If so, I don't understand . _get is returning obj and it'll be
>> needed so would expect to set 'obj->madv = I915_MADV_WILLNEED' which
>> is the case now.
> 
> madv is only evaluated at get_pages(). Once you have the pages, you keep
> them until the shrinker purges them. Hence you only need to call
> get_pages() once and set obj->madv = DONTNEED afterwards, and then you
> only need to check whether the obj is purged before your next reuse (you
> do not need to touch madv ever again). Whilst the object is active it is
> a low priority target for the shrinker. That greatly simplifies the pool
> code.

I have a feeling this may make the driver less readable imo and could also 
require a re-write of the series. The current code may call get_pages() more 
than once and occurs outside of the batch_pool management fncs. Would have to 
re-write things to stop that.

i915_parse_cmds()
  copy_batch()
i915_gem_obj_prepare_shmem_read()
  i915_gem_object_get_pages()

After removing the fancy retry loop suggested by Daniel and moving to a single 
cache list, the implementation looks very simple imo. And, setting 'obj->madv = 
I915_MADV_WILLNEED;' looks right. Readability wise, you don't have to 
investigate further to understand the justification for that statement. Here is 
an RFC snippet...

i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
size_t size)
{  
struct drm_i915_gem_object *obj = NULL;
struct drm_i915_gem_object *tmp, *next;
   
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
   
list_for_each_entry_safe(tmp, next,
&pool->cache_list, batch_pool_list) {
   
if (tmp->active)
continue;
  
if (tmp->madv == __I915_MADV_PURGED) {  
list_del(&tmp->batch_pool_list);
drm_gem_object_unreference(&tmp->base); 
continue;
}
   
if (tmp->base.size >= size &&
tmp->base.size <= (2 * size)) {
obj = tmp;
break;
}
}

if (!obj) {
obj = i915_gem_alloc_object(pool->dev, size);
if (!obj)
return ERR_PTR(-ENOMEM);

list_add_tail(&obj->batch_pool_list, &pool->cache_list);
}

obj->madv = I915_MADV_WILLNEED;

return obj;
}

Given the spirit of your feedback was to simplify pool_get(), does this RFC do 
it for you? If not, I kindly request we have a sync up meeting to discuss your 
'obj->madv = DONTNEED' suggestion.

Thank you much,
-Mike

> -Chris
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v5 1/7] drm/i915: Implement a framework for batch buffer pools

2014-12-08 Thread Michael H. Nguyen



On 12/08/2014 07:19 AM, Bloomfield, Jon wrote:

-Original Message-
From: Nguyen, Michael H
Sent: Wednesday, November 26, 2014 9:54 PM
To: intel-gfx@lists.freedesktop.org
Cc: Bloomfield, Jon; Volkin, Bradley D
Subject: [PATCH v5 1/7] drm/i915: Implement a framework for batch buffer
pools

From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described in the
kerneldoc in the patch. The code is simple, but separating it out makes it
easier to change the underlying algorithms and to extend to future use cases
should they arise.

The interface is simple: init to create an empty pool, fini to clean it up, get 
to
obtain a new buffer. Note that all buffers are expected to be inactive before
cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool for the
command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
  Documentation/DocBook/drm.tmpl |   5 +
  drivers/gpu/drm/i915/Makefile  |   1 +
  drivers/gpu/drm/i915/i915_drv.h|  15 +++
  drivers/gpu/drm/i915/i915_gem.c|   1 +
  drivers/gpu/drm/i915/i915_gem_batch_pool.c | 152
+
  5 files changed, 174 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl
b/Documentation/DocBook/drm.tmpl index 944e8ed..b5943d4 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4028,6 +4028,11 @@ int num_ioctls;
!Idrivers/gpu/drm/i915/i915_cmd_parser.c


+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
  Logical Rings, Logical Ring Contexts and Execlists
!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and
Execlists  !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e4083e4..c8dbb37d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o

  # GEM code
  i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h index c4f2cb6..b12a062 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1154,6 +1154,12 @@ struct intel_l3_parity {
int which_slice;
  };

+struct i915_gem_batch_pool {
+   struct drm_device *dev;
+   struct list_head active_list;
+   struct list_head inactive_list;
+};
+
  struct i915_gem_mm {
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
@@ -1885,6 +1891,8 @@ struct drm_i915_gem_object {
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;

+   struct list_head batch_pool_list;
+
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on @@ -
2849,6 +2857,13 @@ void i915_destroy_error_state(struct drm_device
*dev);  void i915_get_extra_instdone(struct drm_device *dev, uint32_t
*instdone);  const char *i915_cache_level_str(struct drm_i915_private *i915,
int type);

+/* i915_gem_batch_pool.c */
+void i915_gem_batch_pool_init(struct drm_device *dev,
+ struct i915_gem_batch_pool *pool); void
+i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct
+drm_i915_gem_object* i915_gem_batch_pool_get(struct
i915_gem_batch_pool
+*pool, size_t size);
+
  /* i915_cmd_parser.c */
  int i915_cmd_parser_get_version(void);
  int i915_cmd_parser_init_ring(struct intel_engine_cs *ring); diff --git
a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 86cf428..b19dd06 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4433,6 +4433,7 @@ void i915_gem_object_init(struct
drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->ring_list);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
+   INIT_LIST_HEAD(&obj->batch_pool_list);

obj->ops = ops;

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
new file mode 100644
i

[Intel-gfx] [PATCH v6 5/5] drm/i915: Tidy up execbuffer command parsing code

2014-12-08 Thread michael . h . nguyen
From: Brad Volkin 

Move it to a separate function since the main do_execbuffer function
already has so much going on.

v2:
- Move pin/unpin calls inside i915_parse_cmds() (Chris W, v4 7/7
  feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 

Conflicts:
drivers/gpu/drm/i915/i915_gem_execbuffer.c
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |   8 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 126 -
 2 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 118079d..80b1b28 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1042,10 +1042,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
+   return -1;
+   }
+
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
return PTR_ERR(batch_base);
}
 
@@ -1116,6 +1123,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
}
 
vunmap(batch_base);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2071938..3d36465 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1069,6 +1069,65 @@ i915_emit_box(struct intel_engine_cs *ring,
return 0;
 }
 
+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;
+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma->exec_entry = shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(&shadow_batch_obj->base);
+   list_add_tail(&vma->exec_list, &eb->vmas);
+
+   shadow_batch_obj->base.pending_read_domains =
+   batch_obj->base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the command parser is
+* enabled.
+*
+* FIXME: with aliasing ppgtt, buffers that should only
+* be in ggtt still end up in the aliasing ppgtt. remove
+* this check when that is fixed.
+*/
+   if (USES_FULL_PPGTT(dev))
+   *flags |= I915_DISPATCH_SECURE;
+   }
+
+   return ret ? ERR_PTR(ret) : shadow_batch_obj;
+}
 
 int
 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
@@ -1286,7 +1345,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_private *dev_priv = dev->dev_private;
struct eb_vmas *eb;
struct drm_i915_gem_object *batch_obj;
-   struct drm_i915_gem_object *shadow_batch_obj = NULL;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1406,63 +1464,17 @@ i915_gem_do_e

[Intel-gfx] [PATCH v6 2/5] drm/i915: Use batch pools with the command parser

2014-12-08 Thread michael . h . nguyen
From: Brad Volkin 

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser and dispatches the copied
(shadow) batch to the hardware.

After this patch, the parser is in 'enabling' mode.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb->vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

v3:
- Rebase on chained batch handling
- Squash in setting the secure dispatch flag
- Add a note about the interaction w/secure dispatch pinning
- Check for request->batch_obj == NULL in i915_gem_free_request

v4:
- Fix read domains for shadow_batch_obj
- Remove the set_to_gtt_domain call from i915_parse_cmds
- ggtt_pin/unpin in the parser block to simplify error handling
- Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag
- Remove i915_gem_batch_pool_put calls

v5:
- Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after
  the parser (danvet, from v4 0/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 79 +++---
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem.c|  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 +---
 5 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 6e9eac4..ac5f5af 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -840,6 +840,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n");
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj->base.size);
+   if (dest_obj->base.size > src_obj->base.size)
+   memset((u8 *)dest_addr + src_obj->base.size, 0,
+  dest_obj->base.size - src_obj->base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -956,6 +1006,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -967,32 +1018,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret

[Intel-gfx] [PATCH v6 3/5] drm/i915: Use batch length instead of object size in command parser

2014-12-08 Thread michael . h . nguyen
From: Brad Volkin 

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 48 --
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index ac5f5af..118079d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -842,11 +842,19 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_start_offset,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
-   u32 *src_addr, *dest_addr = NULL;
+   u32 *src_base, *dest_base = NULL;
+   u32 *src_addr, *dest_addr;
+   u32 offset = batch_start_offset / sizeof(*dest_addr);
+   u32 end = batch_start_offset + batch_len;
+
+   if (end > dest_obj->base.size || end > src_obj->base.size)
+   return ERR_PTR(-E2BIG);
 
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
if (ret) {
@@ -854,15 +862,17 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
return ERR_PTR(ret);
}
 
-   src_addr = vmap_batch(src_obj);
-   if (!src_addr) {
+   src_base = vmap_batch(src_obj);
+   if (!src_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
ret = -ENOMEM;
goto unpin_src;
}
 
+   src_addr = src_base + offset;
+
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -870,24 +880,27 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   dest_addr = vmap_batch(dest_obj);
-   if (!dest_addr) {
+   dest_base = vmap_batch(dest_obj);
+   if (!dest_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
ret = -ENOMEM;
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj->base.size);
-   if (dest_obj->base.size > src_obj->base.size)
-   memset((u8 *)dest_addr + src_obj->base.size, 0,
-  dest_obj->base.size - src_obj->base.size);
+   dest_addr = dest_base + offset;
+
+   if (batch_start_offset != 0)
+   memset((u8 *)dest_base, 0, batch_start_offset);
+
+   memcpy(dest_addr, src_addr, batch_len);
+   memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end);
 
 unmap_src:
-   vunmap(src_addr);
+   vunmap(src_base);
 unpin_src:
i915_gem_object_unpin_pages(src_obj);
 
-   return ret ? ERR_PTR(ret) : dest_addr;
+   return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1008,6 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1020,6 +1034,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1027,7 +1042,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj,
+   batch_start_offset, b

[Intel-gfx] [PATCH v6 4/5] drm/i915: Mark shadow batch buffers as purgeable

2014-12-08 Thread michael . h . nguyen
From: Brad Volkin 

By adding a new exec_entry flag, we cleanly mark the shadow objects
as purgeable after they are on the active list.

v2:
- Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
  fnc (danvet, from v4 6/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index e9349e3..9e0ec4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -128,5 +128,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_add_tail(&obj->batch_pool_list, &pool->cache_list);
}
 
+   obj->madv = I915_MADV_WILLNEED;
+
return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fccfff5..2071938 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -226,7 +227,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
vma->pin_count--;
 
-   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+   if (entry->flags & __EXEC_OBJECT_PURGEABLE)
+   obj->madv = I915_MADV_DONTNEED;
+
+   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1410,6 +1416,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
 
+   shadow_batch_obj->madv = I915_MADV_WILLNEED;
+
ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
if (ret)
goto err;
@@ -1433,6 +1441,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = &shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v6 1/5] drm/i915: Implement a framework for batch buffer pools

2014-12-08 Thread michael . h . nguyen
From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up, get to obtain a new buffer. Note that all buffers are
expected to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

v7:
- Use single list instead of two. (Chris W)
- s/active_list/cache_list
- Squashed in debug patches (Chris W)
  drm/i915: Add a batch pool debugfs file

  It provides some useful information about the buffers in
  the global command parser batch pool.

  v2: rebase on global pool instead of per-ring pools
  v3: rebase

  drm/i915: Add batch pool details to i915_gem_objects debugfs

  To better account for the potentially large memory consumption
  of the batch pool.

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c|  71 ++--
 drivers/gpu/drm/i915/i915_drv.h|  21 +
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 132 +
 6 files changed, 222 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 85287cb..022923a 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4029,6 +4029,11 @@ int num_ioctls;
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   
   
+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
 Logical Rings, Logical Ring Contexts and Execlists
 !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
 !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e4083e4..c8dbb37d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d0e445e..3c3bf98 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,6 +359,33 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n", \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(&stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.cache_list,
+   batch_pool_list)
+   per_file_stats(0, obj, &stats);
+
+   print_file_stats(m, "batch pool", stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma->obj); \
@@ -441,6 +468,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -458,15 +488,7 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data

[Intel-gfx] [PATCH v6 0/5] Command parser batch buffer copy

2014-12-08 Thread michael . h . nguyen
From: "Michael H. Nguyen" 

This is v6 in response to 
http://lists.freedesktop.org/archives/intel-gfx/2014-November/056314.html

- Simplified pool_get() code
Use single cache list to manage pool
s/active_list/cache_list
Use single loop instead of an inner & outer

- Squashed debugfs patches into 1/5

Brad Volkin (5):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Use batch length instead of object size in command parser
  drm/i915: Mark shadow batch buffers as purgeable
  drm/i915: Tidy up execbuffer command parsing code

 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c |  99 +
 drivers/gpu/drm/i915/i915_debugfs.c|  71 +--
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  23 +
 drivers/gpu/drm/i915/i915_gem.c|   3 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 134 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  98 +
 9 files changed, 393 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 1/5] drm/i915: Implement a framework for batch buffer pools

2014-12-09 Thread Michael H. Nguyen



On 12/09/2014 05:18 AM, Daniel Vetter wrote:

On Mon, Dec 08, 2014 at 02:33:46PM -0800, michael.h.ngu...@intel.com wrote:

From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up, get to obtain a new buffer. Note that all buffers are
expected to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

v7:
- Use single list instead of two. (Chris W)
- s/active_list/cache_list
- Squashed in debug patches (Chris W)
   drm/i915: Add a batch pool debugfs file

   It provides some useful information about the buffers in
   the global command parser batch pool.

   v2: rebase on global pool instead of per-ring pools
   v3: rebase

   drm/i915: Add batch pool details to i915_gem_objects debugfs

   To better account for the potentially large memory consumption
   of the batch pool.

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
  Documentation/DocBook/drm.tmpl |   5 ++
  drivers/gpu/drm/i915/Makefile  |   1 +
  drivers/gpu/drm/i915/i915_debugfs.c|  71 ++--
  drivers/gpu/drm/i915/i915_drv.h|  21 +
  drivers/gpu/drm/i915/i915_gem.c|   1 +
  drivers/gpu/drm/i915/i915_gem_batch_pool.c | 132 +
  6 files changed, 222 insertions(+), 9 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 85287cb..022923a 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4029,6 +4029,11 @@ int num_ioctls;
  !Idrivers/gpu/drm/i915/i915_cmd_parser.c


+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
  Logical Rings, Logical Ring Contexts and Execlists
  !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
  !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e4083e4..c8dbb37d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o

  # GEM code
  i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d0e445e..3c3bf98 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,6 +359,33 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
  }

+#define print_file_stats(m, name, stats) \
+   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n", \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(&stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.cache_list,
+   batch_pool_list)
+   per_file_stats(0, obj, &stats);
+
+   print_file_stats(m, "batch pool", stats);
+}
+
  #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma->obj); \
@@ -441,6 +468,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);

seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
st

Re: [Intel-gfx] [PATCH v6 4/5] drm/i915: Mark shadow batch buffers as purgeable

2014-12-09 Thread Michael H. Nguyen



On 12/09/2014 05:21 AM, Daniel Vetter wrote:

On Mon, Dec 08, 2014 at 02:33:49PM -0800, michael.h.ngu...@intel.com wrote:

From: Brad Volkin 

By adding a new exec_entry flag, we cleanly mark the shadow objects
as purgeable after they are on the active list.

v2:
- Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
   fnc (danvet, from v4 6/7 feedback)


Seems duplicated now.
Good eyes! The duplicate does go away in 5/5 as part of moving much of 
the cmdparser code out of do_execbuffer() but it still doesn't make 4/5 
right. Will fix.


Thanks,
Mike

-Daniel



Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
  drivers/gpu/drm/i915/i915_gem_batch_pool.c |  2 ++
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++-
  2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index e9349e3..9e0ec4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -128,5 +128,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_add_tail(&obj->batch_pool_list, &pool->cache_list);
}

+   obj->madv = I915_MADV_WILLNEED;
+
return obj;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fccfff5..2071938 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
  #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
  #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_PURGEABLE (1<<27)

  #define BATCH_OFFSET_BIAS (256*1024)

@@ -226,7 +227,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
vma->pin_count--;

-   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+   if (entry->flags & __EXEC_OBJECT_PURGEABLE)
+   obj->madv = I915_MADV_DONTNEED;
+
+   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
  }

  static void eb_destroy(struct eb_vmas *eb)
@@ -1410,6 +1416,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}

+   shadow_batch_obj->madv = I915_MADV_WILLNEED;
+
ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
if (ret)
goto err;
@@ -1433,6 +1441,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,

vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = &shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);

--
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 5/5] drm/i915: Tidy up execbuffer command parsing code

2014-12-09 Thread Michael H. Nguyen



On 12/09/2014 05:22 AM, Daniel Vetter wrote:

On Mon, Dec 08, 2014 at 02:33:50PM -0800, michael.h.ngu...@intel.com wrote:

From: Brad Volkin 

Move it to a separate function since the main do_execbuffer function
already has so much going on.

v2:
- Move pin/unpin calls inside i915_parse_cmds() (Chris W, v4 7/7
   feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 

Conflicts:
drivers/gpu/drm/i915/i915_gem_execbuffer.c


Please remove those when rebasing. When actually something changed please
mention that in the patch revlog, e.g.

v3: Rebase (conflicts with patch "foo" in execbuf code).

But if it's a boring rebase without any functional conflicts I wouldn't
bother with a changelog entry.

Noted. In this case, its boring.

Thanks,
Mike

-Daniel


---
  drivers/gpu/drm/i915/i915_cmd_parser.c |   8 ++
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 126 -
  2 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 118079d..80b1b28 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1042,10 +1042,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */

+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
+   return -1;
+   }
+
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
return PTR_ERR(batch_base);
}

@@ -1116,6 +1123,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
}

vunmap(batch_base);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);

return ret;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2071938..3d36465 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1069,6 +1069,65 @@ i915_emit_box(struct intel_engine_cs *ring,
return 0;
  }

+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;
+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma->exec_entry = shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(&shadow_batch_obj->base);
+   list_add_tail(&vma->exec_list, &eb->vmas);
+
+   shadow_batch_obj->base.pending_read_domains =
+   batch_obj->base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the command parser is
+* enabled.
+*
+* FIXME: with aliasing ppgtt, buffers that should only
+* be in ggtt still end up in the aliasing ppgtt. remove
+* this check when that is fixed.
+*/
+   if (USES_FULL_PPGTT(dev))
+   *flags |= I915_DISPATCH_SECURE;
+   }
+
+   return ret ? ERR_PTR(ret) : shadow_batch_obj;
+}

  int
  i915_gem_ringbuffer_submission(struct drm_device *dev, struct d

Re: [Intel-gfx] [PATCH v6 1/5] drm/i915: Implement a framework for batch buffer pools

2014-12-10 Thread Michael H. Nguyen

Hi Jon,

On 12/10/2014 03:06 AM, Bloomfield, Jon wrote:

-Original Message-
From: Intel-gfx [mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf
Of Daniel Vetter
Sent: Tuesday, December 09, 2014 1:18 PM
To: Nguyen, Michael H
Cc: Brad Volkin; intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH v6 1/5] drm/i915: Implement a framework for
batch buffer pools

On Mon, Dec 08, 2014 at 02:33:46PM -0800, michael.h.ngu...@intel.com
wrote:

From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described in
the kerneldoc in the patch. The code is simple, but separating it out
makes it easier to change the underlying algorithms and to extend to
future use cases should they arise.

The interface is simple: init to create an empty pool, fini to clean
it up, get to obtain a new buffer. Note that all buffers are expected
to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool for
the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

v7:
- Use single list instead of two. (Chris W)
- s/active_list/cache_list
- Squashed in debug patches (Chris W)
   drm/i915: Add a batch pool debugfs file

   It provides some useful information about the buffers in
   the global command parser batch pool.

   v2: rebase on global pool instead of per-ring pools
   v3: rebase

   drm/i915: Add batch pool details to i915_gem_objects debugfs

   To better account for the potentially large memory consumption
   of the batch pool.

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
  Documentation/DocBook/drm.tmpl |   5 ++
  drivers/gpu/drm/i915/Makefile  |   1 +
  drivers/gpu/drm/i915/i915_debugfs.c|  71 ++--
  drivers/gpu/drm/i915/i915_drv.h|  21 +
  drivers/gpu/drm/i915/i915_gem.c|   1 +
  drivers/gpu/drm/i915/i915_gem_batch_pool.c | 132
+
  6 files changed, 222 insertions(+), 9 deletions(-)  create mode
100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl
b/Documentation/DocBook/drm.tmpl index 85287cb..022923a 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4029,6 +4029,11 @@ int num_ioctls;
!Idrivers/gpu/drm/i915/i915_cmd_parser.c


+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
  Logical Rings, Logical Ring Contexts and
Execlists  !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings,
Logical Ring Contexts and Execlists
!Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile
b/drivers/gpu/drm/i915/Makefile index e4083e4..c8dbb37d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o

  # GEM code
  i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
b/drivers/gpu/drm/i915/i915_debugfs.c
index d0e445e..3c3bf98 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,6 +359,33 @@ static int per_file_stats(int id, void *ptr, void

*data)

return 0;
  }

+#define print_file_stats(m, name, stats) \
+   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive,

%zu global, %zu shared, %zu unbound)\n", \

+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)


"print_file_stats" might be better named "print_obj_stats" or similar. As it 
stands
there is nothing in its name to suggest its purpose is to describe an object.
I guess the fnc name 'print_file_stats' was chosen b/c it takes a 
'stats' parameter of type 'struct file_stats' and prints the members. 
Seems fair to leave it I think as its generic.




+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv) {
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(&stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   &de

Re: [Intel-gfx] [PATCH v6 1/5] drm/i915: Implement a framework for batch buffer pools

2014-12-10 Thread Michael H. Nguyen



On 12/10/2014 08:41 AM, Bloomfield, Jon wrote:

why do we take the batch_pool lock in i915_gem_batch_pool_info() ?
i915_gem_batch_pool_info() is a new debugfs entry while 
print_batch_pool_stats() is just an internal fnc, called from the 
debugfs entry i915_gem_object_info(). i915_gem_object_info() handles the 
locks.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 4/5] drm/i915: Mark shadow batch buffers as purgeable

2014-12-11 Thread Michael H. Nguyen



On 12/11/2014 05:26 AM, Bloomfield, Jon wrote:

-Original Message-
From: Nguyen, Michael H
Sent: Monday, December 08, 2014 10:34 PM
To: intel-gfx@lists.freedesktop.org
Cc: Bloomfield, Jon; Brad Volkin
Subject: [PATCH v6 4/5] drm/i915: Mark shadow batch buffers as purgeable

From: Brad Volkin 

By adding a new exec_entry flag, we cleanly mark the shadow objects as
purgeable after they are on the active list.

v2:
- Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
   fnc (danvet, from v4 6/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
  drivers/gpu/drm/i915/i915_gem_batch_pool.c |  2 ++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++-
  2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index e9349e3..9e0ec4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -128,5 +128,7 @@ i915_gem_batch_pool_get(struct
i915_gem_batch_pool *pool,
list_add_tail(&obj->batch_pool_list, &pool->cache_list);
}

+   obj->madv = I915_MADV_WILLNEED;
+
return obj;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fccfff5..2071938 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)  #define
__EXEC_OBJECT_NEEDS_MAP (1<<29)  #define
__EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_PURGEABLE (1<<27)

  #define BATCH_OFFSET_BIAS (256*1024)

@@ -226,7 +227,12 @@ i915_gem_execbuffer_unreserve_vma(struct
i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
vma->pin_count--;

-   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
__EXEC_OBJECT_HAS_PIN);
+   if (entry->flags & __EXEC_OBJECT_PURGEABLE)
+   obj->madv = I915_MADV_DONTNEED;
+
+   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
  }

  static void eb_destroy(struct eb_vmas *eb) @@ -1410,6 +1416,8 @@
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}

+   shadow_batch_obj->madv = I915_MADV_WILLNEED;
+


Hasn't i915_gem_batch_pool_get() has already marked the buffer as WILLNEED ?
Yes, good eyes. It was actually corrected in 5/5 but that doesn't make 
this patch okay. Will fix.


Thanks,
Mike




ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
if (ret)
goto err;
@@ -1433,6 +1441,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
void *data,

vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = &shadow_exec_entry;
+   vma->exec_entry->flags =
__EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(&shadow_batch_obj-

base);

list_add_tail(&vma->exec_list, &eb->vmas);

--
1.9.1



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v6 5/5] drm/i915: Tidy up execbuffer command parsing code

2014-12-11 Thread Michael H. Nguyen



On 12/11/2014 05:49 AM, Bloomfield, Jon wrote:




-Original Message-
From: Nguyen, Michael H
Sent: Monday, December 08, 2014 10:34 PM
To: intel-gfx@lists.freedesktop.org
Cc: Bloomfield, Jon; Brad Volkin
Subject: [PATCH v6 5/5] drm/i915: Tidy up execbuffer command parsing code

From: Brad Volkin 

Move it to a separate function since the main do_execbuffer function
already has so much going on.

v2:
- Move pin/unpin calls inside i915_parse_cmds() (Chris W, v4 7/7
   feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 

Conflicts:
drivers/gpu/drm/i915/i915_gem_execbuffer.c
---
  drivers/gpu/drm/i915/i915_cmd_parser.c |   8 ++
  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 126 ---
--
  2 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 118079d..80b1b28 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1042,10 +1042,17 @@ int i915_parse_cmds(struct intel_engine_cs
*ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See
check_cmd() */

+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
+   return -1;
+   }
+
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
return PTR_ERR(batch_base);
}

@@ -1116,6 +1123,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
}

vunmap(batch_base);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);

return ret;
  }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2071938..3d36465 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1069,6 +1069,65 @@ i915_emit_box(struct intel_engine_cs *ring,
return 0;
  }

+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2
*shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv-

mm.batch_pool,

+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;


Shouldn't this be returning an error code ?
Good question. -EACCESS tells the caller of i915_parse_cmds() of a 
special case. There are some comments in its fnc header and implementation.


-EACCESS indicates that batch_obj contains a chained batch in which case 
we dispatch batch_obj as non-secure. We return batch_obj here b/c the 
logic below doesn't apply. I believe the reason we safely dispatch 
chained batches as non-secure is b/c Brad and others probably determined 
that there weren't any valid use cases where chained batches needed to 
be dispatched as secure. Leaving them non-secure, HW will NOP bad cmds 
and its not necessary for i915 to do parsing.


Thanks,
Mike





+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0,
sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma->exec_entry = shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(&shadow_batch_obj->base);
+   list_add_tail(&vma->exec_list, &eb->vmas);
+
+   shadow_batch_obj->base.pending_read_domains =
+   batch_obj->base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the
NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in
the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the c

[Intel-gfx] [PATCH v7 4/5] drm/i915: Mark shadow batch buffers as purgeable

2014-12-11 Thread michael . h . nguyen
From: Brad Volkin 

By adding a new exec_entry flag, we cleanly mark the shadow objects
as purgeable after they are on the active list.

v2:
- Move 'shadow_batch_obj->madv = I915_MADV_WILLNEED' inside _get
  fnc (danvet, from v4 6/7 feedback)

v3:
- Remove duplicate 'madv = I915_MADV_WILLNEED' (danvet, from v6 4/5)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 -
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 6016125..c690170 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -131,5 +131,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
/* Keep list in LRU order */
list_move_tail(&obj->batch_pool_list, &pool->cache_list);
 
+   obj->madv = I915_MADV_WILLNEED;
+
return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fccfff5..9af4053 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
 #define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
+#define  __EXEC_OBJECT_PURGEABLE (1<<27)
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
@@ -226,7 +227,12 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
vma->pin_count--;
 
-   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
+   if (entry->flags & __EXEC_OBJECT_PURGEABLE)
+   obj->madv = I915_MADV_DONTNEED;
+
+   entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE |
+ __EXEC_OBJECT_HAS_PIN |
+ __EXEC_OBJECT_PURGEABLE);
 }
 
 static void eb_destroy(struct eb_vmas *eb)
@@ -1433,6 +1439,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = &shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
drm_gem_object_reference(&shadow_batch_obj->base);
list_add_tail(&vma->exec_list, &eb->vmas);
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v7 3/5] drm/i915: Use batch length instead of object size in command parser

2014-12-11 Thread michael . h . nguyen
From: Brad Volkin 

Previously we couldn't trust the user-supplied batch length because
it came directly from userspace (i.e. untrusted code). It would have
affected what commands software parsed without regard to what hardware
would actually execute, leaving a potential hole.

With the parser now copying the user supplied batch buffer and writing
MI_NOP commands to any space after the copied region, we can safely use
the batch length input. This should be a performance win as the actual
batch length is frequently much smaller than the allocated object size.

v2: Fix handling of non-zero batch_start_offset

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 48 --
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  1 +
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2a4ccac..a698b47 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -850,11 +850,19 @@ finish:
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
 static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
-  struct drm_i915_gem_object *src_obj)
+  struct drm_i915_gem_object *src_obj,
+  u32 batch_start_offset,
+  u32 batch_len)
 {
int ret = 0;
int needs_clflush = 0;
-   u32 *src_addr, *dest_addr = NULL;
+   u32 *src_base, *dest_base = NULL;
+   u32 *src_addr, *dest_addr;
+   u32 offset = batch_start_offset / sizeof(*dest_addr);
+   u32 end = batch_start_offset + batch_len;
+
+   if (end > dest_obj->base.size || end > src_obj->base.size)
+   return ERR_PTR(-E2BIG);
 
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
if (ret) {
@@ -862,15 +870,17 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
return ERR_PTR(ret);
}
 
-   src_addr = vmap_batch(src_obj);
-   if (!src_addr) {
+   src_base = vmap_batch(src_obj);
+   if (!src_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
ret = -ENOMEM;
goto unpin_src;
}
 
+   src_addr = src_base + offset;
+
if (needs_clflush)
-   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+   drm_clflush_virt_range((char *)src_addr, batch_len);
 
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
@@ -878,24 +888,27 @@ static u32 *copy_batch(struct drm_i915_gem_object 
*dest_obj,
goto unmap_src;
}
 
-   dest_addr = vmap_batch(dest_obj);
-   if (!dest_addr) {
+   dest_base = vmap_batch(dest_obj);
+   if (!dest_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
ret = -ENOMEM;
goto unmap_src;
}
 
-   memcpy(dest_addr, src_addr, src_obj->base.size);
-   if (dest_obj->base.size > src_obj->base.size)
-   memset((u8 *)dest_addr + src_obj->base.size, 0,
-  dest_obj->base.size - src_obj->base.size);
+   dest_addr = dest_base + offset;
+
+   if (batch_start_offset != 0)
+   memset((u8 *)dest_base, 0, batch_start_offset);
+
+   memcpy(dest_addr, src_addr, batch_len);
+   memset((u8 *)dest_addr + batch_len, 0, dest_obj->base.size - end);
 
 unmap_src:
-   vunmap(src_addr);
+   vunmap(src_base);
 unpin_src:
i915_gem_object_unpin_pages(src_obj);
 
-   return ret ? ERR_PTR(ret) : dest_addr;
+   return ret ? ERR_PTR(ret) : dest_base;
 }
 
 /**
@@ -1016,6 +1029,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @batch_obj: the batch buffer in question
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
+ * @batch_len: length of the commands in batch_obj
  * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
@@ -1028,6 +1042,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
+   u32 batch_len,
bool is_master)
 {
int ret = 0;
@@ -1035,7 +1050,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   batch_base = copy_batch(shadow_batch_obj, batch_obj);
+   batch_base = copy_batch(shadow_batch_obj, batch_obj,
+   batch_start_offset, b

[Intel-gfx] [PATCH v7 1/5] drm/i915: Implement a framework for batch buffer pools

2014-12-11 Thread michael . h . nguyen
From: Brad Volkin 

This adds a small module for managing a pool of batch buffers.
The only current use case is for the command parser, as described
in the kerneldoc in the patch. The code is simple, but separating
it out makes it easier to change the underlying algorithms and to
extend to future use cases should they arise.

The interface is simple: init to create an empty pool, fini to
clean it up, get to obtain a new buffer. Note that all buffers are
expected to be inactive before cleaning up the pool.

Locking is currently based on the caller holding the struct_mutex.
We already do that in the places where we will use the batch pool
for the command parser.

v2:
- s/BUG_ON/WARN_ON/ for locking assertions
- Remove the cap on pool size
- Switch from alloc/free to init/fini

v3:
- Idiomatic looping structure in _fini
- Correct handling of purged objects
- Don't return a buffer that's too much larger than needed

v4:
- Rebased to latest -nightly

v5:
- Remove _put() function and clean up comments to match

v6:
- Move purged check inside the loop (danvet, from v4 1/7 feedback)

v7:
- Use single list instead of two. (Chris W)
- s/active_list/cache_list
- Squashed in debug patches (Chris W)
  drm/i915: Add a batch pool debugfs file

  It provides some useful information about the buffers in
  the global command parser batch pool.

  v2: rebase on global pool instead of per-ring pools
  v3: rebase

  drm/i915: Add batch pool details to i915_gem_objects debugfs

  To better account for the potentially large memory consumption
  of the batch pool.

v8:
- Keep cache in LRU order (danvet, from v6 1/5 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_debugfs.c|  71 +--
 drivers/gpu/drm/i915/i915_drv.h|  21 +
 drivers/gpu/drm/i915/i915_gem.c|   1 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 135 +
 6 files changed, 225 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 25c23ca..21cbcdb 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4059,6 +4059,11 @@ int num_ioctls;
 !Idrivers/gpu/drm/i915/i915_cmd_parser.c
   
   
+Batchbuffer Pools
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+  
+  
 Logical Rings, Logical Ring Contexts and Execlists
 !Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and 
Execlists
 !Idrivers/gpu/drm/i915/intel_lrc.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3cf70a6..1849ffa 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -19,6 +19,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+ i915_gem_batch_pool.o \
  i915_gem_context.o \
  i915_gem_render_state.o \
  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 479e0c1..5d96d94 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,6 +359,33 @@ static int per_file_stats(int id, void *ptr, void *data)
return 0;
 }
 
+#define print_file_stats(m, name, stats) \
+   seq_printf(m, "%s: %u objects, %zu bytes (%zu active, %zu inactive, %zu 
global, %zu shared, %zu unbound)\n", \
+  name, \
+  stats.count, \
+  stats.total, \
+  stats.active, \
+  stats.inactive, \
+  stats.global, \
+  stats.shared, \
+  stats.unbound)
+
+static void print_batch_pool_stats(struct seq_file *m,
+  struct drm_i915_private *dev_priv)
+{
+   struct drm_i915_gem_object *obj;
+   struct file_stats stats;
+
+   memset(&stats, 0, sizeof(stats));
+
+   list_for_each_entry(obj,
+   &dev_priv->mm.batch_pool.cache_list,
+   batch_pool_list)
+   per_file_stats(0, obj, &stats);
+
+   print_file_stats(m, "batch pool", stats);
+}
+
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_ggtt_size(vma->obj); \
@@ -441,6 +468,9 @@ static int i915_gem_object_info(struct seq_file *m, void* 
data)
   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
seq_putc(m, '\n');
+   print_batch_pool_stats(m, dev_priv);
+
+   seq_putc(m, '\n');
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
@@ -458,15 +488,7 @@ sta

[Intel-gfx] [PATCH v7 5/5] drm/i915: Tidy up execbuffer command parsing code

2014-12-11 Thread michael . h . nguyen
From: Brad Volkin 

Move it to a separate function since the main do_execbuffer function
already has so much going on.

v2:
- Move pin/unpin calls inside i915_parse_cmds() (Chris W, v4 7/7
  feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |   8 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 124 -
 2 files changed, 77 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a698b47..0d757cd 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1050,10 +1050,17 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_cmd_descriptor default_desc = { 0 };
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
+   ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 4096, 0);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to pin shadow batch\n");
+   return -1;
+   }
+
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
return PTR_ERR(batch_base);
}
 
@@ -1124,6 +1131,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
}
 
vunmap(batch_base);
+   i915_gem_object_ggtt_unpin(shadow_batch_obj);
 
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9af4053..3d36465 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1069,6 +1069,65 @@ i915_emit_box(struct intel_engine_cs *ring,
return 0;
 }
 
+static struct drm_i915_gem_object*
+i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+ struct drm_i915_gem_exec_object2 *shadow_exec_entry,
+ struct eb_vmas *eb,
+ struct drm_i915_gem_object *batch_obj,
+ u32 batch_start_offset,
+ u32 batch_len,
+ bool is_master,
+ u32 *flags)
+{
+   struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev);
+   struct drm_i915_gem_object *shadow_batch_obj;
+   int ret;
+
+   shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool,
+  batch_obj->base.size);
+   if (IS_ERR(shadow_batch_obj))
+   return shadow_batch_obj;
+
+   ret = i915_parse_cmds(ring,
+ batch_obj,
+ shadow_batch_obj,
+ batch_start_offset,
+ batch_len,
+ is_master);
+   if (ret) {
+   if (ret == -EACCES)
+   return batch_obj;
+   } else {
+   struct i915_vma *vma;
+
+   memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
+
+   vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
+   vma->exec_entry = shadow_exec_entry;
+   vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE;
+   drm_gem_object_reference(&shadow_batch_obj->base);
+   list_add_tail(&vma->exec_list, &eb->vmas);
+
+   shadow_batch_obj->base.pending_read_domains =
+   batch_obj->base.pending_read_domains;
+
+   /*
+* Set the DISPATCH_SECURE bit to remove the NON_SECURE
+* bit from MI_BATCH_BUFFER_START commands issued in the
+* dispatch_execbuffer implementations. We specifically
+* don't want that set when the command parser is
+* enabled.
+*
+* FIXME: with aliasing ppgtt, buffers that should only
+* be in ggtt still end up in the aliasing ppgtt. remove
+* this check when that is fixed.
+*/
+   if (USES_FULL_PPGTT(dev))
+   *flags |= I915_DISPATCH_SECURE;
+   }
+
+   return ret ? ERR_PTR(ret) : shadow_batch_obj;
+}
 
 int
 i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
@@ -1286,7 +1345,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_private *dev_priv = dev->dev_private;
struct eb_vmas *eb;
struct drm_i915_gem_object *batch_obj;
-   struct drm_i915_gem_object *shadow_batch_obj = NULL;
struct drm_i915_gem_exec_object2 shadow_exec_entry;
struct intel_engine_cs *ring;
struct intel_context *ctx;
@@ -1406,62 +1464,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
}
 

[Intel-gfx] [PATCH v7 0/5] Command parser batch buffer copy

2014-12-11 Thread michael . h . nguyen
From: "Michael H. Nguyen" 

This is v7 in response to 
http://lists.freedesktop.org/archives/intel-gfx/2014-December/057033.html

Minor updates from the last rev
- Keep batch pool in LRU order (Daniel)
- Remove duplicate madv assignments (Daniel)

Brad Volkin (5):
  drm/i915: Implement a framework for batch buffer pools
  drm/i915: Use batch pools with the command parser
  drm/i915: Use batch length instead of object size in command parser
  drm/i915: Mark shadow batch buffers as purgeable
  drm/i915: Tidy up execbuffer command parsing code

 Documentation/DocBook/drm.tmpl |   5 ++
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_cmd_parser.c |  99 +
 drivers/gpu/drm/i915/i915_debugfs.c|  71 +--
 drivers/gpu/drm/i915/i915_dma.c|   1 +
 drivers/gpu/drm/i915/i915_drv.h|  23 +
 drivers/gpu/drm/i915/i915_gem.c|   3 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 137 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  98 +
 9 files changed, 396 insertions(+), 42 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_batch_pool.c

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v7 2/5] drm/i915: Use batch pools with the command parser

2014-12-11 Thread michael . h . nguyen
From: Brad Volkin 

This patch sets up all of the tracking and copying necessary to
use batch pools with the command parser and dispatches the copied
(shadow) batch to the hardware.

After this patch, the parser is in 'enabling' mode.

Note that performance takes a hit from the copy in some cases
and will likely need some work. At a rough pass, the memcpy
appears to be the bottleneck. Without having done a deeper
analysis, two ideas that come to mind are:
1) Copy sections of the batch at a time, as they are reached
   by parsing. Might improve cache locality.
2) Copy only up to the userspace-supplied batch length and
   memset the rest of the buffer. Reduces the number of reads.

v2:
- Remove setting the capacity of the pool
- One global pool instead of per-ring pools
- Replace batch_obj with shadow_batch_obj and hook into eb->vmas
- Memset any space in the shadow batch beyond what gets copied
- Rebased on execlist prep refactoring

v3:
- Rebase on chained batch handling
- Squash in setting the secure dispatch flag
- Add a note about the interaction w/secure dispatch pinning
- Check for request->batch_obj == NULL in i915_gem_free_request

v4:
- Fix read domains for shadow_batch_obj
- Remove the set_to_gtt_domain call from i915_parse_cmds
- ggtt_pin/unpin in the parser block to simplify error handling
- Check USES_FULL_PPGTT before setting DISPATCH_SECURE flag
- Remove i915_gem_batch_pool_put calls

v5:
- Move 'pending_read_domains |= I915_GEM_DOMAIN_COMMAND' after
  the parser (danvet, from v4 0/7 feedback)

Issue: VIZ-4719
Signed-off-by: Brad Volkin 
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 79 +++---
 drivers/gpu/drm/i915/i915_dma.c|  1 +
 drivers/gpu/drm/i915/i915_drv.h|  1 +
 drivers/gpu/drm/i915/i915_gem.c|  2 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 +---
 5 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c 
b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b882bf2..2a4ccac 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -848,6 +848,56 @@ finish:
return (u32*)addr;
 }
 
+/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
+static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
+  struct drm_i915_gem_object *src_obj)
+{
+   int ret = 0;
+   int needs_clflush = 0;
+   u32 *src_addr, *dest_addr = NULL;
+
+   ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: failed to prep read\n");
+   return ERR_PTR(ret);
+   }
+
+   src_addr = vmap_batch(src_obj);
+   if (!src_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
+   ret = -ENOMEM;
+   goto unpin_src;
+   }
+
+   if (needs_clflush)
+   drm_clflush_virt_range((char *)src_addr, src_obj->base.size);
+
+   ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
+   if (ret) {
+   DRM_DEBUG_DRIVER("CMD: Failed to set batch CPU domain\n");
+   goto unmap_src;
+   }
+
+   dest_addr = vmap_batch(dest_obj);
+   if (!dest_addr) {
+   DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
+   ret = -ENOMEM;
+   goto unmap_src;
+   }
+
+   memcpy(dest_addr, src_addr, src_obj->base.size);
+   if (dest_obj->base.size > src_obj->base.size)
+   memset((u8 *)dest_addr + src_obj->base.size, 0,
+  dest_obj->base.size - src_obj->base.size);
+
+unmap_src:
+   vunmap(src_addr);
+unpin_src:
+   i915_gem_object_unpin_pages(src_obj);
+
+   return ret ? ERR_PTR(ret) : dest_addr;
+}
+
 /**
  * i915_needs_cmd_parser() - should a given ring use software command parsing?
  * @ring: the ring in question
@@ -964,6 +1014,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
+ * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @is_master: is the submitting process the drm master?
  *
@@ -975,32 +1026,28 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  */
 int i915_parse_cmds(struct intel_engine_cs *ring,
struct drm_i915_gem_object *batch_obj,
+   struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
bool is_master)
 {
int ret = 0;
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
-   int needs_clflush = 0;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 
-   ret

[Intel-gfx] i915 tracepoints - perf list shows none

2014-06-23 Thread Michael H Nguyen

Hi,

$ perf list

I see "net", "iommu" and other trace events listed however I do not see 
any i915 ones. Is there anything in particular I need to do at build or 
runtime to make them show up? i915 is loaded. I am using the perf tool 
built under ~/tools/perf.


Thanks,
Mike




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/bdw: Initialize workarounds in render ring init fn.

2014-08-15 Thread Michael H. Nguyen



On 08/14/2014 09:51 AM, arun.siluv...@linux.intel.com wrote:

From: Arun Siluvery 

The workarounds at the moment are initialized in init_clock_gating() but
they are lost during reset, suspend/resume; this patch moves workarounds
that are part of register state context to render ring init fn otherwise
default context ends up with incorrect values as they don't get initialized
until init_clock_gating fn. This should be ok as they are not related to
display clock gating in the first place.

v2: Add macro to generate w/a table (Daniel)

Signed-off-by: Arun Siluvery 
---
  drivers/gpu/drm/i915/i915_debugfs.c | 38 +++
  drivers/gpu/drm/i915/i915_drv.h | 32 
  drivers/gpu/drm/i915/intel_pm.c | 50 -
  drivers/gpu/drm/i915/intel_ringbuffer.c | 66 +
  4 files changed, 136 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e737b7..fc28835 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2406,6 +2406,43 @@ static int i915_shared_dplls_info(struct seq_file *m, 
void *unused)
return 0;
  }

+static int intel_wa_registers(struct seq_file *m, void *unused)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m->private;
+   struct drm_device *dev = node->minor->dev;
+   struct drm_i915_private *dev_priv = dev->dev_private;
+   int i;
+   int ret;
+
+   if (!dev_priv->num_wa) {
+   DRM_DEBUG_DRIVER("Workaround table not available\n");
+   return -EINVAL;
+   }
+
+   if (dev_priv->num_wa > I915_MAX_WA_REGS)
+   dev_priv->num_wa = I915_MAX_WA_REGS;
+
+   ret = mutex_lock_interruptible(&dev->struct_mutex);
+   if (ret)
+   return ret;
+
+   intel_runtime_pm_get(dev_priv);
+
+   seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa);
+   for (i = 0; i < dev_priv->num_wa; ++i) {
+   if (dev_priv->wa_regs[i].addr)
+   seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
+  dev_priv->wa_regs[i].addr,
+  I915_READ(dev_priv->wa_regs[i].addr),
+  dev_priv->wa_regs[i].mask);
+   }
+
+   intel_runtime_pm_put(dev_priv);
+   mutex_unlock(&dev->struct_mutex);
+
+   return 0;
+}
+
  struct pipe_crc_info {
const char *name;
struct drm_device *dev;
@@ -3936,6 +3973,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_semaphore_status", i915_semaphore_status, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0},
+   {"intel_wa_registers", intel_wa_registers, 0},
  };
  #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18c9ad8..c4190a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1222,6 +1222,24 @@ struct i915_gpu_error {
unsigned int test_irq_rings;
  };

+/*
+ * workarounds are currently applied at different places and changes
+ * are being done to consolidate them at a single place hence the
+ * exact count is not clear at this point; replace this with accurate
+ * number based on gen later.
+ */
+#define I915_MAX_WA_REGS  16
+
+/*
+ * structure to verify workarounds status
+ * mask: represent w/a bits
+ */
+struct intel_wa {
+   u32 addr;
+   u32 val;
+   u32 mask;
+};


Can we rename this to intel_reg_wa or similar? intel_wa confused me for 
a second. Personally when I think about wa's, they can be simple 
register writes like is the case here or more complex requiring some 
software logic.


Also, should this be defined in a userspace accessible header? I see 
this defined again locally in the IGT test.



+
  enum modeset_restore {
MODESET_ON_LID_OPEN,
MODESET_DONE,
@@ -1422,6 +1440,9 @@ struct drm_i915_private {
struct drm_i915_gem_object *semaphore_obj;
uint32_t last_seqno, next_seqno;

+   uint32_t num_wa;
+   struct intel_wa wa_regs[I915_MAX_WA_REGS];
+
drm_dma_handle_t *status_page_dmah;
struct resource mch_res;

@@ -2803,6 +2824,17 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, 
int val);
  #define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
  #define POSTING_READ16(reg)   (void)I915_READ16_NOTRACE(reg)

+#define I915_WRITE_WA(reg, val) ({ \
+   if (dev_priv->num_wa >= 0 \
+   && dev_priv->num_wa < I915_MAX_WA_REGS) { \
+   dev_priv->wa_regs[dev_priv->num_wa].addr = reg;   \
+   dev_priv->wa_regs[dev_priv->num_wa].mask  \
+   = (val) & 0x;