[Mesa-dev] [RFC PATCH 8/8] nv50: enable GL_AMD_performance_monitor

2015-06-22 Thread Samuel Pitoiset
This exposes a group of global performance counters that enables
GL_AMD_performance_monitor. All piglit tests are okay.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 35 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 +
 3 files changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 062d427..6638e82 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -1566,6 +1566,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
 
  info->name = cfg->event->name;
  info->query_type = NV50_HW_PM_QUERY(id);
+ info->group_id = NV50_HW_PM_QUERY_GROUP;
  info->max_value.u64 =
 (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0;
  return 1;
@@ -1576,6 +1577,40 @@ nv50_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
return 0;
 }
 
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+unsigned id,
+struct pipe_driver_query_group_info 
*info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   // TODO: Check DRM version when nvif will be merged in libdrm!
+   if (screen->base.perfmon) {
+  count++; /* NV50_HW_PM_QUERY_GROUP */
+   }
+
+   if (!info)
+  return count;
+
+   if (id == NV50_HW_PM_QUERY_GROUP) {
+  if (screen->base.perfmon) {
+ info->name = "Global performance counters";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ info->num_queries = NV50_HW_PM_QUERY_COUNT;
+ info->max_active_queries = 1; /* TODO: get rid of this limitation! */
+ return 1;
+  }
+   }
+
+   /* user asked for info about non-existing query group */
+   info->name = "this_is_not_the_query_group_you_are_looking_for";
+   info->max_active_queries = 0;
+   info->num_queries = 0;
+   info->type = 0;
+   return 0;
+}
+
 void
 nv50_init_query_functions(struct nv50_context *nv50)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index f07798e..dfe20c9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -746,6 +746,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+   pscreen->get_driver_query_group_info = 
nv50_screen_get_driver_query_group_info;
 
nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 69127c0..807ae0e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
 }
 
+/* Hardware global performance counters groups. */
+#define NV50_HW_PM_QUERY_GROUP 0
+
 /* Hardware global performance counters. */
 #define NV50_HW_PM_QUERY_COUNT  24
 #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i))
@@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen)
 int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
   struct pipe_driver_query_info *);
 
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+struct 
pipe_driver_query_group_info *);
+
 boolean nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions

2015-06-22 Thread Samuel Pitoiset
This may happen when nv50_query_create() fails to create a new query.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 55fcac8..1162110 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -96,6 +96,9 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
 static void
 nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
 {
+   if (!pq)
+  return;
+
nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
FREE(nv50_query(pq));
@@ -152,6 +155,9 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
 
+   if (!pq)
+  return FALSE;
+
/* For occlusion queries we have to change the storage, because a previous
 * query might set the initial render conition to FALSE even *after* we re-
 * initialized it to TRUE.
@@ -218,6 +224,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
 
+   if (!pq)
+  return;
+
q->state = NV50_QUERY_STATE_ENDED;
 
switch (q->type) {
@@ -294,9 +303,12 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
boolean *res8 = (boolean *)result;
-   uint64_t *data64 = (uint64_t *)q->data;
+   uint64_t *data64;
int i;
 
+   if (!pq)
+  return FALSE;
+
if (q->state != NV50_QUERY_STATE_READY)
   nv50_query_update(q);
 
@@ -314,6 +326,7 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
}
q->state = NV50_QUERY_STATE_READY;
 
+   data64 = (uint64_t *)q->data;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
   res8[0] = TRUE;
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 2/8] nv50: allocate a software object class

2015-06-22 Thread Samuel Pitoiset
This will allow to monitor global performance counters through the
command stream of the GPU instead of using ioctls.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_winsys.h |  1 +
 3 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6583a35..c985344 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
nouveau_object_del(&screen->sync);
+   nouveau_object_del(&screen->sw);
 
nouveau_screen_fini(&screen->base);
 
@@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->tesla->handle);
 
+   BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+   PUSH_DATA (push, screen->sw->handle);
+
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
 
@@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev)
   goto fail;
}
 
+   ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,
+NULL, 0, &screen->sw);
+   if (ret) {
+  NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret);
+  goto fail;
+   }
+
ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
 NULL, 0, &screen->m2mf);
if (ret) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b..69fdfdb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -93,6 +93,7 @@ struct nv50_screen {
struct nouveau_object *tesla;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
+   struct nouveau_object *sw;
 };
 
 static INLINE struct nv50_screen *
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h 
b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index e8578c8..5cb33ef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo 
*bo, uint32_t flags)
 #define SUBC_COMPUTE(m) 6, (m)
 #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
 
+#define SUBC_SW(m) 7, (m)
 
 static INLINE uint32_t
 NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 3/8] nv50: allocate and map a notifier buffer object for PM

2015-06-22 Thread Samuel Pitoiset
This notifier buffer object will be used to read back global performance
counters results written by the kernel.

For each domain, we will store the handle of the perfdom object, an
array of 4 counters and the number of cycles. Like the Gallium's HUD,
we keep a list of busy queries in a ring in order to prevent stalls
when reading queries.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 29 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 ++
 2 files changed, 35 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index c985344..3a99cc8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->m2mf);
nouveau_object_del(&screen->sync);
nouveau_object_del(&screen->sw);
+   nouveau_object_del(&screen->query);
 
nouveau_screen_fini(&screen->base);
 
@@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev)
struct nv50_screen *screen;
struct pipe_screen *pscreen;
struct nouveau_object *chan;
+   struct nv04_fifo *fifo;
uint64_t value;
uint32_t tesla_class;
unsigned stack_size;
+   uint32_t length;
int ret;
 
screen = CALLOC_STRUCT(nv50_screen);
@@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev)
screen->base.pushbuf->rsvd_kick = 5;
 
chan = screen->base.channel;
+   fifo = chan->data;
 
pscreen->destroy = nv50_screen_destroy;
pscreen->context_create = nv50_create;
@@ -772,6 +776,23 @@ nv50_screen_create(struct nouveau_device *dev)
   goto fail;
}
 
+   /* Compute size (in bytes) of the notifier buffer object which is used
+* in order to read back global performance counters results written
+* by the kernel. For each domain, we store the handle of the perfdom
+* object, an array of 4 counters and the number of cycles. Like for
+* the Gallium's HUD, we keep a list of busy queries in a ring in order
+* to prevent stalls when reading queries. */
+   length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) *
+  NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4;
+
+   ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS,
+&(struct nv04_notify){ .length = length },
+sizeof(struct nv04_notify), &screen->query);
+   if (ret) {
+   NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret);
+   goto fail;
+   }
+
ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,
 NULL, 0, &screen->sw);
if (ret) {
@@ -845,6 +866,14 @@ nv50_screen_create(struct nouveau_device *dev)
nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 
+   ret = nouveau_bo_wrap(screen->base.device, fifo->notify, 
&screen->notify_bo);
+   if (ret == 0)
+  nouveau_bo_map(screen->notify_bo, 0, screen->base.client);
+   if (ret) {
+  NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret);
+  goto fail;
+   }
+
nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
 
screen->TPs = util_bitcount(value & 0x);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 69fdfdb..71a5247 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -59,6 +59,7 @@ struct nv50_screen {
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *stack_bo;
struct nouveau_bo *tls_bo;
+   struct nouveau_bo *notify_bo;
 
unsigned TPs;
unsigned MPsInTP;
@@ -89,6 +90,7 @@ struct nv50_screen {
} fence;
 
struct nouveau_object *sync;
+   struct nouveau_object *query;
 
struct nouveau_object *tesla;
struct nouveau_object *eng2d;
@@ -96,6 +98,10 @@ struct nv50_screen {
struct nouveau_object *sw;
 };
 
+/* Parameters of the ring buffer used to read back global PM counters. */
+#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS 8
+#define NV50_HW_PM_RING_BUFFER_MAX_QUERIES 9 /* HUD_NUM_QUERIES + 1 */
+
 static INLINE struct nv50_screen *
 nv50_screen(struct pipe_screen *screen)
 {
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 7/8] nv50: expose global performance counters to the HUD

2015-06-22 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 41 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  3 ++
 3 files changed, 45 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index b9d2914..062d427 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -1535,6 +1535,47 @@ nv50_hw_pm_query_result(struct nv50_context *nv50, 
struct nv50_query *q,
return TRUE;
 }
 
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+  unsigned id,
+  struct pipe_driver_query_info *info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   // TODO: Check DRM version when nvif will be merged in libdrm!
+   if (screen->base.perfmon) {
+  nv50_identify_events(screen);
+  count += NV50_HW_PM_QUERY_COUNT;
+   }
+
+   if (!info)
+  return count;
+
+   /* Init default values. */
+   info->name = "this_is_not_the_query_you_are_looking_for";
+   info->query_type = 0xdeadd01d;
+   info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+   info->max_value.u64 = 0;
+   info->group_id = -1;
+
+   if (id < count) {
+  if (screen->base.perfmon) {
+ const struct nv50_hw_pm_query_cfg *cfg =
+nv50_hw_pm_query_get_cfg(screen, NV50_HW_PM_QUERY(id));
+
+ info->name = cfg->event->name;
+ info->query_type = NV50_HW_PM_QUERY(id);
+ info->max_value.u64 =
+(cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0;
+ return 1;
+  }
+   }
+
+   /* User asked for info about non-existing query. */
+   return 0;
+}
+
 void
 nv50_init_query_functions(struct nv50_context *nv50)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 53817c0..f07798e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -745,6 +745,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+   pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
 
nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 0449659..69127c0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -143,6 +143,9 @@ nv50_screen(struct pipe_screen *screen)
 #define NV50_HW_PM_QUERY_TEX_CACHE_HIT  22
 #define NV50_HW_PM_QUERY_TEX_WAITS_FOR_FB   23
 
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+  struct pipe_driver_query_info *);
+
 boolean nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters

2015-06-22 Thread Samuel Pitoiset
This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1057 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
 2 files changed, 1087 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1162110..b9d2914 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
 #include "nv50/nv50_context.h"
 #include "nv_object.xml.h"
 
+#include "nouveau_perfmon.h"
+
 #define NV50_QUERY_STATE_READY   0
 #define NV50_QUERY_STATE_ACTIVE  1
 #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
boolean is64bit;
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
 };
 
 #define NV50_QUERY_ALLOC_SPACE 256
 
+#ifdef DEBUG
+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, void *);
+
 static INLINE struct nv50_query *
 nv50_query(struct pipe_query *pipe)
 {
@@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
 static void
 nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
 {
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
if (!pq)
   return;
 
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST))
+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, &q->fence);
+   FREE(q);
 }
 
 static struct pipe_query *
@@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
   q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
}
 
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) {
+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
return (struct pipe_query *)q;
 }
 
@@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;
 
if (!pq)
   return FALSE;
@@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
   nv50_query_get(push, q, 0x10, 0x5002);
   break;
default:
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ ret = nv50_hw_pm_query_begin(nv50, q);
+  }
   break;
}
q->state = NV50_QUERY_STATE_ACTIVE;
-   return true;
+   return ret;
 }
 
 static void
@@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
   q->state = NV50_QUERY_STATE_READY;
   break;
default:
-  assert(0);
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ nv50_hw_pm_query_end(nv50, q);
+  }
   break;
}
 
@@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
if (!pq)
   return FALSE;
 
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) {
+  return nv50_hw_pm_query_result(nv50, q, wait, result);
+   }
+
if (q->state != NV50_QUERY_STATE_READY)
   nv50_query_update(q);
 
@@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe,
nv50_query_end(pipe, targ->pq);
 }
 
+/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
+
+struct nv50_hw_pm_source_cfg
+{
+   const char *name;
+   uint64_t value;
+};
+
+struct nv50_hw_pm_signal_cfg
+{
+   const char *name;
+   const struct nv50_hw_pm_source_cfg src[8];
+};
+
+struct nv50_hw_pm_counter_cfg
+{
+   uint16_t logic_op;
+   const struct nv50_hw_pm_signal_cfg sig[4];
+};
+

[Mesa-dev] [RFC PATCH 0/8] nv50: expose global performance counters

2015-06-22 Thread Samuel Pitoiset
Hello there,

This series exposes NVIDIA's global performance counters for Tesla through the
Gallium's HUD and the GL_AMD_performance_monitor extension.

This adds support for 24 hardware events which have been reverse engineered
with PerfKit (Windows) and CUPTI (Linux). These hardware events will allow
developers to profile OpenGL applications.

To reduce latency and to improve accuracy, these global performance counters
are tied to the command stream of the GPU using a set of software methods
instead of ioctls. Results are then written by the kernel to a mapped notifier
buffer object that allows the userspace to read back them.

However, the libdrm branch which implements the new nvif interface exposed by
Nouveau and the software methods interface are not upstream yet. I hope this
should done in the next days.

The code of this series can be found here:
http://cgit.freedesktop.org/~hakzsam/mesa/log/?h=nouveau_perfmon

The libdrm branch can be found here:
http://cgit.freedesktop.org/~hakzsam/drm/log/?h=nouveau_perfmon

The code of the software methods interface can be found here (two last commits):
http://cgit.freedesktop.org/~hakzsam/nouveau/log/?h=nouveau_perfmon

An other series which exposes global performance counters for Fermi and Kepler
will be submitted once I have got enough reviews for this one.

Feel free to make a review.

Thanks,
Samuel.

Samuel Pitoiset (8):
  nouveau: implement the nvif hardware performance counters interface
  nv50: allocate a software object class
  nv50: allocate and map a notifier buffer object for PM
  nv50: configure the ring buffer for reading back PM counters
  nv50: prevent NULL pointer dereference with pipe_query functions
  nv50: add support for compute/graphics global performance counters
  nv50: expose global performance counters to the HUD
  nv50: enable GL_AMD_performance_monitor

 src/gallium/drivers/nouveau/Makefile.sources   |2 +
 src/gallium/drivers/nouveau/nouveau_perfmon.c  |  302 +++
 src/gallium/drivers/nouveau/nouveau_perfmon.h  |   59 ++
 src/gallium/drivers/nouveau/nouveau_screen.c   |5 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |1 +
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1148 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   49 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |   51 ++
 src/gallium/drivers/nouveau/nv50/nv50_winsys.h |1 +
 9 files changed, 1612 insertions(+), 6 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h

-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 4/8] nv50: configure the ring buffer for reading back PM counters

2015-06-22 Thread Samuel Pitoiset
To write data at the right offset, the kernel has to know some
parameters of this ring buffer, like the number of domains and the
maximum number of queries.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 3a99cc8..53817c0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -441,6 +441,13 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
 
BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->sw->handle);
+   BEGIN_NV04(push, SUBC_SW(0x0190), 1);
+   PUSH_DATA (push, screen->query->handle);
+   // XXX: Maybe add a check for DRM version here ?
+   BEGIN_NV04(push, SUBC_SW(0x0600), 1);
+   PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES);
+   BEGIN_NV04(push, SUBC_SW(0x0604), 1);
+   PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS);
 
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 1/8] nouveau: implement the nvif hardware performance counters interface

2015-06-22 Thread Samuel Pitoiset
This commit implements the base interface for hardware performance
counters that will be shared between nv50 and nvc0 drivers.

TODO: Bump libdrm version of mesa when nvif will be merged.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 src/gallium/drivers/nouveau/nouveau_perfmon.c | 302 ++
 src/gallium/drivers/nouveau/nouveau_perfmon.h |  59 +
 src/gallium/drivers/nouveau/nouveau_screen.c  |   5 +
 src/gallium/drivers/nouveau/nouveau_screen.h  |   1 +
 5 files changed, 369 insertions(+)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 3fae3bc..3da0bdc 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -10,6 +10,8 @@ C_SOURCES := \
nouveau_heap.h \
nouveau_mm.c \
nouveau_mm.h \
+   nouveau_perfmon.c \
+   nouveau_perfmon.h \
nouveau_screen.c \
nouveau_screen.h \
nouveau_statebuf.h \
diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c 
b/src/gallium/drivers/nouveau/nouveau_perfmon.c
new file mode 100644
index 000..3798612
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "util/u_memory.h"
+
+#include "nouveau_debug.h"
+#include "nouveau_winsys.h"
+#include "nouveau_perfmon.h"
+
+static int
+nouveau_perfmon_query_sources(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom,
+  struct nouveau_perfmon_sig *sig)
+{
+   struct nvif_perfmon_query_source_v0 args = {};
+
+   args.domain = dom->id;
+   args.signal = sig->signal;
+   do {
+   uint8_t prev_iter = args.iter;
+   struct nouveau_perfmon_src *src;
+   int ret;
+
+   ret = nouveau_object_mthd(pm->object, 
NVIF_PERFMON_V0_QUERY_SOURCE,
+   &args, sizeof(args));
+   if (ret)
+   return ret;
+
+   if (prev_iter) {
+   args.iter = prev_iter;
+   ret = nouveau_object_mthd(pm->object, 
NVIF_PERFMON_V0_QUERY_SOURCE,
+   &args, 
sizeof(args));
+   if (ret)
+   return ret;
+
+   src = CALLOC_STRUCT(nouveau_perfmon_src);
+   if (!src)
+   return -ENOMEM;
+
+#if 0
+   debug_printf("id   = %d\n", args.source);
+   debug_printf("name = %s\n", args.name);
+   debug_printf("mask = %08x\n", args.mask);
+   debug_printf("\n");
+#endif
+
+  src->id = args.source;
+ strncpy(src->name, args.name, sizeof(src->name));
+   list_addtail(&src->head, &sig->sources);
+   }
+   } while (args.iter != 0xff);
+
+   return 0;
+}
+
+static int
+nouveau_perfmon_query_signals(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom)
+{
+   struct nvif_perfmon_query_signal_v0 args = {};
+
+   args.domain = dom->id;
+   do {
+  uint16_t prev_iter = args.iter;
+  struct nouveau_perfmon_sig *sig;
+  int ret;
+
+  ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SIGNAL,
+&args, sizeof(args));
+  if (ret)
+   

Re: [Mesa-dev] [Nouveau] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions

2015-06-22 Thread Samuel Pitoiset



On 06/22/2015 10:52 PM, Ilia Mirkin wrote:

If query_create fails, why would any of these functions get called?


Because the HUD doesn't check if query_create() fails and it calls other 
pipe_query functions with NULL pointer instead of a valid query object.




On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
 wrote:

This may happen when nv50_query_create() fails to create a new query.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query.c | 15 ++-
  1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 55fcac8..1162110 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -96,6 +96,9 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
  static void
  nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
  {
+   if (!pq)
+  return;
+
 nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
 nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
 FREE(nv50_query(pq));
@@ -152,6 +155,9 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
 struct nouveau_pushbuf *push = nv50->base.pushbuf;
 struct nv50_query *q = nv50_query(pq);

+   if (!pq)
+  return FALSE;
+
 /* For occlusion queries we have to change the storage, because a previous
  * query might set the initial render conition to FALSE even *after* we re-
  * initialized it to TRUE.
@@ -218,6 +224,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
 struct nouveau_pushbuf *push = nv50->base.pushbuf;
 struct nv50_query *q = nv50_query(pq);

+   if (!pq)
+  return;
+
 q->state = NV50_QUERY_STATE_ENDED;

 switch (q->type) {
@@ -294,9 +303,12 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
 uint64_t *res64 = (uint64_t *)result;
 uint32_t *res32 = (uint32_t *)result;
 boolean *res8 = (boolean *)result;
-   uint64_t *data64 = (uint64_t *)q->data;
+   uint64_t *data64;
 int i;

+   if (!pq)
+  return FALSE;
+
 if (q->state != NV50_QUERY_STATE_READY)
nv50_query_update(q);

@@ -314,6 +326,7 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
 }
 q->state = NV50_QUERY_STATE_READY;

+   data64 = (uint64_t *)q->data;
 switch (q->type) {
 case PIPE_QUERY_GPU_FINISHED:
res8[0] = TRUE;
--
2.4.4

___
Nouveau mailing list
nouv...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions

2015-06-23 Thread Samuel Pitoiset



On 06/23/2015 08:57 AM, Michel Dänzer wrote:

On 23.06.2015 06:02, Samuel Pitoiset wrote:


On 06/22/2015 10:52 PM, Ilia Mirkin wrote:

If query_create fails, why would any of these functions get called?

Because the HUD doesn't check if query_create() fails and it calls other
pipe_query functions with NULL pointer instead of a valid query object.

Could the HUD code be fixed instead?
It's definitely possible, and probably the best solution instead of 
preventing NULL pointer dereference in the underlying drivers. I'll make 
a patch.





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gallium/hud: prevent NULL pointer dereference with pipe_query functions

2015-06-24 Thread Samuel Pitoiset
The HUD doesn't check if query_create() fails and it calls other
pipe_query functions with NULL pointer instead of a valid query object.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 603aba7..ee71678 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -62,7 +62,8 @@ query_new_value(struct hud_graph *gr)
uint64_t now = os_time_get();
 
if (info->last_time) {
-  pipe->end_query(pipe, info->query[info->head]);
+  if (info->query[info->head])
+ pipe->end_query(pipe, info->query[info->head]);
 
   /* read query results */
   while (1) {
@@ -70,7 +71,7 @@ query_new_value(struct hud_graph *gr)
  union pipe_query_result result;
  uint64_t *res64 = (uint64_t *)&result;
 
- if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+ if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
 info->results_cumulative += res64[info->result_index];
 info->num_results++;
 
@@ -88,7 +89,8 @@ query_new_value(struct hud_graph *gr)
"gallium_hud: all queries are busy after %i frames, "
"can't add another query\n",
NUM_QUERIES);
-   pipe->destroy_query(pipe, info->query[info->head]);
+   if (info->query[info->head])
+  pipe->destroy_query(pipe, info->query[info->head]);
info->query[info->head] =
  pipe->create_query(pipe, info->query_type, 0);
 }
@@ -113,15 +115,15 @@ query_new_value(struct hud_graph *gr)
  info->results_cumulative = 0;
  info->num_results = 0;
   }
-
-  pipe->begin_query(pipe, info->query[info->head]);
}
else {
   /* initialize */
   info->last_time = now;
   info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
-  pipe->begin_query(pipe, info->query[info->head]);
}
+
+   if (info->query[info->head])
+  pipe->begin_query(pipe, info->query[info->head]);
 }
 
 static void
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/hud: prevent NULL pointer dereference with pipe_query functions

2015-06-25 Thread Samuel Pitoiset



On 06/25/2015 02:36 PM, Marek Olšák wrote:

What's the point of drawing a HUD pane if a query cannot be created?


With my series which adds support for global performance counters on 
NV50, query_create() may fail if we want to monitor *two* different 
query types with the HUD.
This limitation is due to how the HUD uses the pipe_query interface and 
this doesn't fit well with the underlying interface exposed by Nouveau.


In other words, with two different query types the scenario is as follows:
CREATE Q1, BEGIN Q1, CREATE Q2, BEGIN Q2, END Q1, RESULT Q1, BEGIN Q1, 
END Q2, RESULT Q2, BEGIN Q2, END Q1, and so on.


But, with nv50/nvc0 drivers I need to schedule hardware counters at 
query creation and this is going to be pretty hard without a really 
weird workaround.
Hence, only one query type can be monitored simultaneously, and 
query_create() fails.


A better scenario for nouveau drivers will be:
CREATE Q1, CREATE Q2, BEGIN Q1, BEGIN Q2, END Q1, END Q2, RESULT Q1, 
RESULT Q2, BEGIN Q1, and so on.
This could allow to introduce, for example, begin_all_queries() and 
end_all_queries() to be able to create/begin/end all queries in one shot 
*only*.


My plan is to change this behaviour but it will require lot of changes 
in the HUD mainly because queries are collected by pane.



Can we detect this during initialization?


I'm not sure if we can detect this at initialization and if this is 
going to be easy to do.
But, how can we handle the case where a driver will only fail one time 
to create a query? Do we need to remove the pane? Not sure.
This is going to be hard to say, especially because nouveau drivers 
could fail if no hardware counters are available.




Marek

On Wed, Jun 24, 2015 at 9:26 PM, Samuel Pitoiset
 wrote:

The HUD doesn't check if query_create() fails and it calls other
pipe_query functions with NULL pointer instead of a valid query object.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/auxiliary/hud/hud_driver_query.c | 14 --
  1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 603aba7..ee71678 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -62,7 +62,8 @@ query_new_value(struct hud_graph *gr)
 uint64_t now = os_time_get();

 if (info->last_time) {
-  pipe->end_query(pipe, info->query[info->head]);
+  if (info->query[info->head])
+ pipe->end_query(pipe, info->query[info->head]);

/* read query results */
while (1) {
@@ -70,7 +71,7 @@ query_new_value(struct hud_graph *gr)
   union pipe_query_result result;
   uint64_t *res64 = (uint64_t *)&result;

- if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+ if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
  info->results_cumulative += res64[info->result_index];
  info->num_results++;

@@ -88,7 +89,8 @@ query_new_value(struct hud_graph *gr)
 "gallium_hud: all queries are busy after %i frames, "
 "can't add another query\n",
 NUM_QUERIES);
-   pipe->destroy_query(pipe, info->query[info->head]);
+   if (info->query[info->head])
+  pipe->destroy_query(pipe, info->query[info->head]);
 info->query[info->head] =
   pipe->create_query(pipe, info->query_type, 0);
  }
@@ -113,15 +115,15 @@ query_new_value(struct hud_graph *gr)
   info->results_cumulative = 0;
   info->num_results = 0;
}
-
-  pipe->begin_query(pipe, info->query[info->head]);
 }
 else {
/* initialize */
info->last_time = now;
info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
-  pipe->begin_query(pipe, info->query[info->head]);
 }
+
+   if (info->query[info->head])
+  pipe->begin_query(pipe, info->query[info->head]);
  }

  static void
--
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [RFC PATCH 3/8] nv50: allocate and map a notifier buffer object for PM

2015-06-28 Thread Samuel Pitoiset



On 06/26/2015 01:02 AM, Ilia Mirkin wrote:

On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
 wrote:

This notifier buffer object will be used to read back global performance
counters results written by the kernel.

For each domain, we will store the handle of the perfdom object, an
array of 4 counters and the number of cycles. Like the Gallium's HUD,
we keep a list of busy queries in a ring in order to prevent stalls
when reading queries.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 29 ++
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 ++
  2 files changed, 35 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index c985344..3a99cc8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 nouveau_object_del(&screen->m2mf);
 nouveau_object_del(&screen->sync);
 nouveau_object_del(&screen->sw);
+   nouveau_object_del(&screen->query);

 nouveau_screen_fini(&screen->base);

@@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev)
 struct nv50_screen *screen;
 struct pipe_screen *pscreen;
 struct nouveau_object *chan;
+   struct nv04_fifo *fifo;
 uint64_t value;
 uint32_t tesla_class;
 unsigned stack_size;
+   uint32_t length;
 int ret;

 screen = CALLOC_STRUCT(nv50_screen);
@@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev)
 screen->base.pushbuf->rsvd_kick = 5;

 chan = screen->base.channel;
+   fifo = chan->data;

 pscreen->destroy = nv50_screen_destroy;
 pscreen->context_create = nv50_create;
@@ -772,6 +776,23 @@ nv50_screen_create(struct nouveau_device *dev)
goto fail;
 }

+   /* Compute size (in bytes) of the notifier buffer object which is used
+* in order to read back global performance counters results written
+* by the kernel. For each domain, we store the handle of the perfdom
+* object, an array of 4 counters and the number of cycles. Like for
+* the Gallium's HUD, we keep a list of busy queries in a ring in order
+* to prevent stalls when reading queries. */
+   length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) *
+  NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4;

This calculation may become apparent to me later, but it certainly
isn't now. What's the *6? You refer to an array of 4 counters...
should that have been 6 counters? Or should this have been a 4?


This refers to the handle of the object, the array of 4 counters and the 
number of cycles.

In other words, for each domain we store: id, ctr0, ctr1, ctr2, ctr3, clk.




+
+   ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS,
+&(struct nv04_notify){ .length = length },
+sizeof(struct nv04_notify), &screen->query);
+   if (ret) {
+   NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret);
+   goto fail;
+   }
+
 ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,
  NULL, 0, &screen->sw);
 if (ret) {
@@ -845,6 +866,14 @@ nv50_screen_create(struct nouveau_device *dev)
 nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);

+   ret = nouveau_bo_wrap(screen->base.device, fifo->notify, 
&screen->notify_bo);
+   if (ret == 0)
+  nouveau_bo_map(screen->notify_bo, 0, screen->base.client);

ret = ...


Good catch, thanks.




+   if (ret) {
+  NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret);
+  goto fail;
+   }
+
 nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);

 screen->TPs = util_bitcount(value & 0x);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 69fdfdb..71a5247 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -59,6 +59,7 @@ struct nv50_screen {
 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
 struct nouveau_bo *stack_bo;
 struct nouveau_bo *tls_bo;
+   struct nouveau_bo *notify_bo;

 unsigned TPs;
 unsigned MPsInTP;
@@ -89,6 +90,7 @@ struct nv50_screen {
 } fence;

 struct nouveau_object *sync;
+   struct nouveau_object *query;

 struct nouveau_object *tesla;
 struct nouveau_object *eng2d;
@@ -96,6 +98,10 @@ struct nv50_screen {
 struct nouveau_object *sw;
  };

+/* Parameters of the ring buffer used to read back global PM counters. */
+#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS 

Re: [Mesa-dev] [Nouveau] [RFC PATCH 4/8] nv50: configure the ring buffer for reading back PM counters

2015-06-28 Thread Samuel Pitoiset



On 06/26/2015 01:04 AM, Ilia Mirkin wrote:

Yeah, this whole thing has to be guarded by a drm version check,
otherwise it'll end up with errors in dmesg I assume. Perhaps only
allocate screen->query when the drm version matches, and gate things
on that for the rest of the code?


Yes, this sounds good to me.



On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
 wrote:

To write data at the right offset, the kernel has to know some
parameters of this ring buffer, like the number of domains and the
maximum number of queries.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 7 +++
  1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 3a99cc8..53817c0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -441,6 +441,13 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)

 BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
 PUSH_DATA (push, screen->sw->handle);
+   BEGIN_NV04(push, SUBC_SW(0x0190), 1);
+   PUSH_DATA (push, screen->query->handle);
+   // XXX: Maybe add a check for DRM version here ?
+   BEGIN_NV04(push, SUBC_SW(0x0600), 1);
+   PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES);
+   BEGIN_NV04(push, SUBC_SW(0x0604), 1);
+   PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS);

FYI you can do BEGIN_NV04(..., 2), since they're sequential.


I'm going to make the change.




 BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
 PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
--
2.4.4

___
Nouveau mailing list
nouv...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters

2015-06-28 Thread Samuel Pitoiset



On 06/26/2015 01:09 AM, Ilia Mirkin wrote:

What's with the \%'s everywhere?


Maybe "percent" will be better ?



On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset
 wrote:

This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1057 +++-
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
  2 files changed, 1087 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1162110..b9d2914 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
  #include "nv50/nv50_context.h"
  #include "nv_object.xml.h"

+#include "nouveau_perfmon.h"
+
  #define NV50_QUERY_STATE_READY   0
  #define NV50_QUERY_STATE_ACTIVE  1
  #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
 boolean is64bit;
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
  };

  #define NV50_QUERY_ALLOC_SPACE 256

+#ifdef DEBUG
+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, void *);
+
  static INLINE struct nv50_query *
  nv50_query(struct pipe_query *pipe)
  {
@@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
  static void
  nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
  {
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
 if (!pq)
return;

-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST))
+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, &q->fence);
+   FREE(q);
  }

  static struct pipe_query *
@@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
 }

+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) {
+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
 return (struct pipe_query *)q;
  }

@@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
 struct nv50_context *nv50 = nv50_context(pipe);
 struct nouveau_pushbuf *push = nv50->base.pushbuf;
 struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;

 if (!pq)
return FALSE;
@@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
nv50_query_get(push, q, 0x10, 0x5002);
break;
 default:
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ ret = nv50_hw_pm_query_begin(nv50, q);
+  }
break;
 }
 q->state = NV50_QUERY_STATE_ACTIVE;
-   return true;
+   return ret;
  }

  static void
@@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
q->state = NV50_QUERY_STATE_READY;
break;
 default:
-  assert(0);
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ nv50_hw_pm_query_end(nv50, q);
+  }
break;
 }

@@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
 if (!pq)
return FALSE;

+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) {
+  return nv50_hw_pm_query_result(nv50, q, wait, result);
+   }
+
 if (q->state != NV50_QUERY_STATE_READY)
nv50_query_update(q);

@@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe,
 nv50_query_end(pipe, targ->pq);
  }

+/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
+
+struct nv50_hw_pm_source_cfg
+{
+   const char *name;
+   uint64_t

[Mesa-dev] [PATCH v2 0/7] nv50: expose global performance counters

2015-06-30 Thread Samuel Pitoiset
Hello there,

This series exposes NVIDIA's global performance counters for Tesla through the
Gallium's HUD and the GL_AMD_performance_monitor extension.

This adds support for 24 hardware events which have been reverse engineered
with PerfKit (Windows) and CUPTI (Linux). These hardware events will allow
developers to profile OpenGL applications.

To reduce latency and to improve accuracy, these global performance counters
are tied to the command stream of the GPU using a set of software methods
instead of ioctls. Results are then written by the kernel to a mapped notifier
buffer object that allows the userspace to read back them.

However, the libdrm branch which implements the new nvif interface exposed by
Nouveau and the software methods interface are not upstream yet. I hope this
should done in the next days.

The code of this series can be found here:
http://cgit.freedesktop.org/~hakzsam/mesa/log/?h=nouveau_perfmon

The libdrm branch can be found here:
http://cgit.freedesktop.org/~hakzsam/drm/log/?h=nouveau_perfmon

The code of the software methods interface can be found here (two last commits):
http://cgit.freedesktop.org/~hakzsam/nouveau/log/?h=nouveau_perfmon

An other series which exposes global performance counters for Fermi and Kepler
will be submitted once I have got enough reviews for this one.

Feel free to make a review.

Thanks,
Samuel.

Samuel Pitoiset (7):
  nouveau: implement the nvif hardware performance counters interface
  nv50: allocate a software object class
  nv50: allocate and map a notifier buffer object for PM
  nv50: configure the ring buffer for reading back PM counters
  nv50: add support for compute/graphics global performance counters
  nv50: expose global performance counters to the HUD
  nv50: enable GL_AMD_performance_monitor

 src/gallium/drivers/nouveau/Makefile.sources   |2 +
 src/gallium/drivers/nouveau/nouveau_perfmon.c  |  290 ++
 src/gallium/drivers/nouveau/nouveau_perfmon.h  |   58 ++
 src/gallium/drivers/nouveau/nouveau_screen.c   |5 +
 src/gallium/drivers/nouveau/nouveau_screen.h   |1 +
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1142 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   55 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |   51 ++
 src/gallium/drivers/nouveau/nv50/nv50_winsys.h |1 +
 9 files changed, 1600 insertions(+), 5 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h

-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/7] nv50: allocate a software object class

2015-06-30 Thread Samuel Pitoiset
This will allow to monitor global performance counters through the
command stream of the GPU instead of using ioctls.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_winsys.h |  1 +
 3 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6583a35..c985344 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->eng2d);
nouveau_object_del(&screen->m2mf);
nouveau_object_del(&screen->sync);
+   nouveau_object_del(&screen->sw);
 
nouveau_screen_fini(&screen->base);
 
@@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->tesla->handle);
 
+   BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+   PUSH_DATA (push, screen->sw->handle);
+
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
 
@@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev)
   goto fail;
}
 
+   ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,
+NULL, 0, &screen->sw);
+   if (ret) {
+  NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret);
+  goto fail;
+   }
+
ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
 NULL, 0, &screen->m2mf);
if (ret) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b..69fdfdb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -93,6 +93,7 @@ struct nv50_screen {
struct nouveau_object *tesla;
struct nouveau_object *eng2d;
struct nouveau_object *m2mf;
+   struct nouveau_object *sw;
 };
 
 static INLINE struct nv50_screen *
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h 
b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index e8578c8..5cb33ef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo 
*bo, uint32_t flags)
 #define SUBC_COMPUTE(m) 6, (m)
 #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
 
+#define SUBC_SW(m) 7, (m)
 
 static INLINE uint32_t
 NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/7] nv50: configure the ring buffer for reading back PM counters

2015-06-30 Thread Samuel Pitoiset
To write data at the right offset, the kernel has to know some
parameters of this ring buffer, like the number of domains and the
maximum number of queries.

Changes since v2:
- only configure the ring buffer if the notifier BO is allocated
- only use one BEGIN_NV04()

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ab95d65..335bff1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -442,6 +442,16 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->sw->handle);
 
+   if (screen->query) {
+  /* Do not need to configure the ring buffer used to read back
+   * global performance counters when it is not allocated. */
+  BEGIN_NV04(push, SUBC_SW(0x0190), 1);
+  PUSH_DATA (push, screen->query->handle);
+  BEGIN_NV04(push, SUBC_SW(0x0600), 2);
+  PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES);
+  PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS);
+   }
+
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
 
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/7] nv50: expose global performance counters to the HUD

2015-06-30 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 41 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  3 ++
 3 files changed, 45 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 7fb6f3a..7dadb77 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -1532,6 +1532,47 @@ nv50_hw_pm_query_result(struct nv50_context *nv50, 
struct nv50_query *q,
return TRUE;
 }
 
+int
+nv50_screen_get_driver_query_info(struct pipe_screen *pscreen,
+  unsigned id,
+  struct pipe_driver_query_info *info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   // TODO: Check DRM version when nvif will be merged in libdrm!
+   if (screen->base.perfmon) {
+  nv50_identify_events(screen);
+  count += NV50_HW_PM_QUERY_COUNT;
+   }
+
+   if (!info)
+  return count;
+
+   /* Init default values. */
+   info->name = "this_is_not_the_query_you_are_looking_for";
+   info->query_type = 0xdeadd01d;
+   info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+   info->max_value.u64 = 0;
+   info->group_id = -1;
+
+   if (id < count) {
+  if (screen->base.perfmon) {
+ const struct nv50_hw_pm_query_cfg *cfg =
+nv50_hw_pm_query_get_cfg(screen, NV50_HW_PM_QUERY(id));
+
+ info->name = cfg->event->name;
+ info->query_type = NV50_HW_PM_QUERY(id);
+ info->max_value.u64 =
+(cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0;
+ return 1;
+  }
+   }
+
+   /* User asked for info about non-existing query. */
+   return 0;
+}
+
 void
 nv50_init_query_functions(struct nv50_context *nv50)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 335bff1..ac1acd1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -748,6 +748,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_param = nv50_screen_get_param;
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
+   pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
 
nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 0449659..69127c0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -143,6 +143,9 @@ nv50_screen(struct pipe_screen *screen)
 #define NV50_HW_PM_QUERY_TEX_CACHE_HIT  22
 #define NV50_HW_PM_QUERY_TEX_WAITS_FOR_FB   23
 
+int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+  struct pipe_driver_query_info *);
+
 boolean nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/7] nv50: allocate and map a notifier buffer object for PM

2015-06-30 Thread Samuel Pitoiset
This notifier buffer object will be used to read back global performance
counters results written by the kernel.

For each domain, we will store the handle of the perfdom object, an
array of 4 counters and the number of cycles. Like the Gallium's HUD,
we keep a list of busy queries in a ring in order to prevent stalls
when reading queries.

Changes since v2:
- check return value of nouveau_bo_map()
- add a libdrm version check around creating the notifier BO

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 32 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 +
 2 files changed, 38 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index c985344..ab95d65 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_object_del(&screen->m2mf);
nouveau_object_del(&screen->sync);
nouveau_object_del(&screen->sw);
+   nouveau_object_del(&screen->query);
 
nouveau_screen_fini(&screen->base);
 
@@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev)
struct nv50_screen *screen;
struct pipe_screen *pscreen;
struct nouveau_object *chan;
+   struct nv04_fifo *fifo;
uint64_t value;
uint32_t tesla_class;
unsigned stack_size;
+   uint32_t length;
int ret;
 
screen = CALLOC_STRUCT(nv50_screen);
@@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev)
screen->base.pushbuf->rsvd_kick = 5;
 
chan = screen->base.channel;
+   fifo = chan->data;
 
pscreen->destroy = nv50_screen_destroy;
pscreen->context_create = nv50_create;
@@ -772,6 +776,26 @@ nv50_screen_create(struct nouveau_device *dev)
   goto fail;
}
 
+   // TODO: Update libdrm version when nvif will be merged!
+   if (dev->drm_version >= 0x01000101) {
+  /* Compute size (in bytes) of the notifier buffer object which is used
+   * in order to read back global performance counters results written
+   * by the kernel. For each domain, we store the handle of the perfdom
+   * object, an array of 4 counters and the number of cycles. Like for
+   * the Gallium's HUD, we keep a list of busy queries in a ring in order
+   * to prevent stalls when reading queries. */
+  length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) *
+ NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4;
+
+  ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS,
+   &(struct nv04_notify){ .length = length },
+   sizeof(struct nv04_notify), &screen->query);
+  if (ret) {
+  NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret);
+  goto fail;
+  }
+   }
+
ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,
 NULL, 0, &screen->sw);
if (ret) {
@@ -845,6 +869,14 @@ nv50_screen_create(struct nouveau_device *dev)
nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
 
+   ret = nouveau_bo_wrap(screen->base.device, fifo->notify, 
&screen->notify_bo);
+   if (ret == 0)
+  ret = nouveau_bo_map(screen->notify_bo, 0, screen->base.client);
+   if (ret) {
+  NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret);
+  goto fail;
+   }
+
nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
 
screen->TPs = util_bitcount(value & 0x);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 69fdfdb..71a5247 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -59,6 +59,7 @@ struct nv50_screen {
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *stack_bo;
struct nouveau_bo *tls_bo;
+   struct nouveau_bo *notify_bo;
 
unsigned TPs;
unsigned MPsInTP;
@@ -89,6 +90,7 @@ struct nv50_screen {
} fence;
 
struct nouveau_object *sync;
+   struct nouveau_object *query;
 
struct nouveau_object *tesla;
struct nouveau_object *eng2d;
@@ -96,6 +98,10 @@ struct nv50_screen {
struct nouveau_object *sw;
 };
 
+/* Parameters of the ring buffer used to read back global PM counters. */
+#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS 8
+#define NV50_HW_PM_RING_BUFFER_MAX_QUERIES 9 /* HUD_NUM_QUERIES + 1 */
+
 static INLINE struct nv50_screen *
 nv50_screen(struct pipe_screen *screen)
 {
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/7] nouveau: implement the nvif hardware performance counters interface

2015-06-30 Thread Samuel Pitoiset
This commit implements the base interface for hardware performance
counters that will be shared between nv50 and nvc0 drivers.

TODO: Bump libdrm version of mesa when nvif will be merged.

Changes since v2:
- remove double-query thing for domains, signals and sources

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources  |   2 +
 src/gallium/drivers/nouveau/nouveau_perfmon.c | 290 ++
 src/gallium/drivers/nouveau/nouveau_perfmon.h |  58 ++
 src/gallium/drivers/nouveau/nouveau_screen.c  |   5 +
 src/gallium/drivers/nouveau/nouveau_screen.h  |   1 +
 5 files changed, 356 insertions(+)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c
 create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 3fae3bc..3da0bdc 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -10,6 +10,8 @@ C_SOURCES := \
nouveau_heap.h \
nouveau_mm.c \
nouveau_mm.h \
+   nouveau_perfmon.c \
+   nouveau_perfmon.h \
nouveau_screen.c \
nouveau_screen.h \
nouveau_statebuf.h \
diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c 
b/src/gallium/drivers/nouveau/nouveau_perfmon.c
new file mode 100644
index 000..e1d4546
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "util/u_memory.h"
+
+#include "nouveau_debug.h"
+#include "nouveau_winsys.h"
+#include "nouveau_perfmon.h"
+
+static int
+nouveau_perfmon_query_sources(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom,
+  struct nouveau_perfmon_sig *sig)
+{
+   struct nvif_perfmon_query_source_v0 args = {};
+
+   args.iter   = 1;
+   args.domain = dom->id;
+   args.signal = sig->signal;
+   do {
+  struct nouveau_perfmon_src *src;
+  int ret;
+
+  ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SOURCE,
+&args, sizeof(args));
+  if (ret)
+ return ret;
+
+  src = CALLOC_STRUCT(nouveau_perfmon_src);
+  if (!src)
+ return -ENOMEM;
+
+#if 0
+  debug_printf("id   = %d\n", args.source);
+  debug_printf("name = %s\n", args.name);
+  debug_printf("mask = %08x\n", args.mask);
+  debug_printf("\n");
+#endif
+
+  src->id = args.source;
+  strncpy(src->name, args.name, sizeof(src->name));
+  list_addtail(&src->head, &sig->sources);
+   } while (args.iter != 0xff);
+
+   return 0;
+}
+
+static int
+nouveau_perfmon_query_signals(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom)
+{
+   struct nvif_perfmon_query_signal_v0 args = {};
+
+   args.iter   = 1;
+   args.domain = dom->id;
+   do {
+  struct nouveau_perfmon_sig *sig;
+  int ret;
+
+  ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SIGNAL,
+&args, sizeof(args));
+  if (ret)
+ return ret;
+
+  sig = CALLOC_STRUCT(nouveau_perfmon_sig);
+  if (!sig)
+ return -ENOMEM;
+  list_inithead(&sig->sources);
+
+#if 0
+  debug_printf("name  = %s\n", args.name);
+  debug_printf("signal= 0x%02x\n", args.signal);
+  debug_printf("source_nr = %d\n", args.source_nr);
+  debug_printf("\n");
+#endif
+
+  sig->signal = args.signal;
+  strncpy(sig->name, args.name, sizeof(sig->name));
+  list_addtail(&sig->head, &dom->signals);
+
+  /* Q

[Mesa-dev] [PATCH v2 7/7] nv50: enable GL_AMD_performance_monitor

2015-06-30 Thread Samuel Pitoiset
This exposes a group of global performance counters that enables
GL_AMD_performance_monitor. All piglit tests are okay.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 35 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 +
 3 files changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 7dadb77..6d57305 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -1563,6 +1563,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
 
  info->name = cfg->event->name;
  info->query_type = NV50_HW_PM_QUERY(id);
+ info->group_id = NV50_HW_PM_QUERY_GROUP;
  info->max_value.u64 =
 (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0;
  return 1;
@@ -1573,6 +1574,40 @@ nv50_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
return 0;
 }
 
+int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+unsigned id,
+struct pipe_driver_query_group_info 
*info)
+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   // TODO: Check DRM version when nvif will be merged in libdrm!
+   if (screen->base.perfmon) {
+  count++; /* NV50_HW_PM_QUERY_GROUP */
+   }
+
+   if (!info)
+  return count;
+
+   if (id == NV50_HW_PM_QUERY_GROUP) {
+  if (screen->base.perfmon) {
+ info->name = "Global performance counters";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ info->num_queries = NV50_HW_PM_QUERY_COUNT;
+ info->max_active_queries = 1; /* TODO: get rid of this limitation! */
+ return 1;
+  }
+   }
+
+   /* user asked for info about non-existing query group */
+   info->name = "this_is_not_the_query_group_you_are_looking_for";
+   info->max_active_queries = 0;
+   info->num_queries = 0;
+   info->type = 0;
+   return 0;
+}
+
 void
 nv50_init_query_functions(struct nv50_context *nv50)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ac1acd1..05f921d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -749,6 +749,7 @@ nv50_screen_create(struct nouveau_device *dev)
pscreen->get_shader_param = nv50_screen_get_shader_param;
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+   pscreen->get_driver_query_group_info = 
nv50_screen_get_driver_query_group_info;
 
nv50_screen_init_resource_functions(pscreen);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 69127c0..807ae0e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
 }
 
+/* Hardware global performance counters groups. */
+#define NV50_HW_PM_QUERY_GROUP 0
+
 /* Hardware global performance counters. */
 #define NV50_HW_PM_QUERY_COUNT  24
 #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i))
@@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen)
 int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
   struct pipe_driver_query_info *);
 
+int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
+struct 
pipe_driver_query_group_info *);
+
 boolean nv50_blitter_create(struct nv50_screen *);
 void nv50_blitter_destroy(struct nv50_screen *);
 
-- 
2.4.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/7] nv50: add support for compute/graphics global performance counters

2015-06-30 Thread Samuel Pitoiset
This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Changes since v2:
- replace \% by percentage
- remove one extra call to PUSH_SPACE
- use nouveau_fence instead of my hand-made fence mechanism

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1066 +++-
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
 2 files changed, 1096 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 81f7474..7fb6f3a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
 #include "nv50/nv50_context.h"
 #include "nv_object.xml.h"
 
+#include "nouveau_perfmon.h"
+
 #define NV50_QUERY_STATE_READY   0
 #define NV50_QUERY_STATE_ACTIVE  1
 #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
boolean is64bit;
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
 };
 
 #define NV50_QUERY_ALLOC_SPACE 256
 
+#ifdef DEBUG
+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *);
+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, void *);
+
 static INLINE struct nv50_query *
 nv50_query(struct pipe_query *pipe)
 {
@@ -96,9 +113,15 @@ nv50_query_allocate(struct nv50_context *nv50, struct 
nv50_query *q, int size)
 static void
 nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
 {
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST))
+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, &q->fence);
+   FREE(q);
 }
 
 static struct pipe_query *
@@ -120,6 +143,12 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
  type == PIPE_QUERY_PRIMITIVES_EMITTED ||
  type == PIPE_QUERY_SO_STATISTICS ||
  type == PIPE_QUERY_PIPELINE_STATISTICS);
+   if (type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST) {
+  /* Hardware global performance counters are not 64 bits, but we also use
+   * a fence to make sure the query is ready. */
+  q->is64bit = TRUE;
+   }
+
q->type = type;
 
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -127,6 +156,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned 
type, unsigned index)
   q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
}
 
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) {
+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
return (struct pipe_query *)q;
 }
 
@@ -151,6 +185,7 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;
 
/* For occlusion queries we have to change the storage, because a previous
 * query might set the initial render conition to FALSE even *after* we re-
@@ -205,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
   nv50_query_get(push, q, 0x10, 0x5002);
   break;
default:
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ ret = nv50_hw_pm_query_begin(nv50, q);
+  }
   break;
}
q->state = NV50_QUERY_STATE_ACTIVE;
-   return true;
+   return ret;
 }
 
 static void
@@ -265,7 +303,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query 
*pq)
   q->state = NV50_QUERY_STATE_READY;
   break;
default:
-  assert(0);
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {
+ nv50_hw_pm_query_end(nv50, q);
+  }
   break;
}
 
@@ -30

[Mesa-dev] [PATCH] nvc0: fix wrong use of BLIT_SRC_Y_INT for 2D texture copy

2015-07-06 Thread Samuel Pitoiset
According to nv50, this should be src->ms_y instead of src->ms_x. This
code is here since 2012, so it's probably a typo error which has never
been detected since a long time. I didn't do a full piglit run to check
if it fixes some other weird issues.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index a820de7..53cd8cd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -189,7 +189,7 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
PUSH_DATA (push, 0);
PUSH_DATA (push, sx << src->ms_x);
PUSH_DATA (push, 0);
-   PUSH_DATA (push, sy << src->ms_x);
+   PUSH_DATA (push, sy << src->ms_y);
 
return 0;
 }
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: fix a SIGSEGV with piglit bin/gl-3.1-vao-broken-attrib

2015-07-06 Thread Samuel Pitoiset
Before validating vertex arrays we need to check if a VBO is present.
Checking if vb->buffer is not NULL fixes the issue.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 1fd33b8..3d200bd 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -382,6 +382,11 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
   if (nv50->vbo_user & (1 << b)) {
  address = addrs[b] + ve->pipe.src_offset;
  limit = addrs[b] + limits[b];
+  } else
+  if (!vb->buffer) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ continue;
   } else {
  struct nv04_resource *buf = nv04_resource(vb->buffer);
  if (!(refd & (1 << b))) {
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params

2015-07-13 Thread Samuel Pitoiset
What piglit test does this fix?

On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin  wrote:

> Signed-off-by: Ilia Mirkin 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>
> Even though in practice a geometry program will never be using UCP's,
> we still were revalidating (aka recompiling) the program when more
> clip planes became enabled (which also are used for regular clip
> distances).
>
> This seems like it should have led to massive fail, but I guess you
> don't change the number of clip planes when using geometry shaders.
> But I'm going to put this through a full piglit run just in case
> there's something I'm missing.
>
>  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index 785e52e..11f2b10 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
>nvc0_vertprog_validate(nvc0);
> else
> if (likely(vp == nvc0->gmtyprog))
> -  nvc0_vertprog_validate(nvc0);
> +  nvc0_gmtyprog_validate(nvc0);
> else
>nvc0_tevlprog_validate(nvc0);
>  }
> --
> 2.3.6
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>



-- 
Best regards,
Samuel Pitoiset.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nv50: turn samples counts off during blit

2015-07-13 Thread Samuel Pitoiset
Fixes the following piglit test:
  occlusion_query_meta_no_fragments

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_surface.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c 
b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index dc9852d..66eccc2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -1432,6 +1432,7 @@ static void
 nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
 {
struct nv50_context *nv50 = nv50_context(pipe);
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
boolean eng3d = FALSE;
 
if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
@@ -1493,10 +1494,20 @@ nv50_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *info)
 info->src.box.height != -info->dst.box.height))
   eng3d = TRUE;
 
+   if (nv50->screen->num_occlusion_queries_active) {
+  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+  PUSH_DATA (push, 0);
+   }
+
if (!eng3d)
   nv50_blit_eng2d(nv50, info);
else
   nv50_blit_3d(nv50, info);
+
+   if (nv50->screen->num_occlusion_queries_active) {
+  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+  PUSH_DATA (push, 1);
+   }
 }
 
 static void
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nv50: add nesting support for occlusion queries

2015-07-13 Thread Samuel Pitoiset
This is loosely based on nvc0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 27 --
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  2 ++
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 81f7474..80d3fd2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -49,6 +49,7 @@ struct nv50_query {
uint32_t offset; /* base + i * 32 */
uint8_t state;
boolean is64bit;
+   int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
 };
@@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
 
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
-  PUSH_SPACE(push, 4);
-  BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
-  PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
-  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-  PUSH_DATA (push, 1);
+  q->nesting = nv50->screen->num_occlusion_queries_active++;
+  if (q->nesting) {
+ nv50_query_get(push, q, 0x10, 0x0100f002);
+  } else {
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+  }
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
   nv50_query_get(push, q, 0x10, 0x06805002);
@@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct 
pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
   nv50_query_get(push, q, 0, 0x0100f002);
-  PUSH_SPACE(push, 2);
-  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-  PUSH_DATA (push, 0);
+  if (--nv50->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+  }
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
   nv50_query_get(push, q, 0, 0x06805002);
@@ -396,8 +404,7 @@ nv50_render_condition(struct pipe_context *pipe,
   case PIPE_QUERY_OCCLUSION_COUNTER:
   case PIPE_QUERY_OCCLUSION_PREDICATE:
  if (likely(!condition)) {
-/* XXX: Placeholder, handle nesting here if available */
-if (unlikely(false))
+if (unlikely(q->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
  NV50_3D_COND_MODE_ALWAYS;
 else
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b..3a12a1f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -54,6 +54,8 @@ struct nv50_screen {
struct nv50_context *cur_ctx;
struct nv50_graph_state save_state;
 
+   int num_occlusion_queries_active;
+
struct nouveau_bo *code;
struct nouveau_bo *uniforms;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: limit the maximum number of samplers to 16

2015-07-13 Thread Samuel Pitoiset
NV50_3D_BIND_TSC only allows to bind 16 samplers, and since we don't
want to do anything with NV50_3D_BIND_TSC2, just limit the maximum
number of samplers to 16 like for nvc0.

This fixes dmesg fails with the following piglit test:
 max-samplers

But the test still fails.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6583a35..46ae0b8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -286,7 +286,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, 
unsigned shader,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
   /* The chip could handle more sampler views than samplers */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-  return MIN2(32, PIPE_MAX_SAMPLERS);
+  return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nv50: add nesting support for occlusion queries

2015-07-13 Thread Samuel Pitoiset
This is loosely based on nvc0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 29 --
 src/gallium/drivers/nouveau/nv50/nv50_screen.h |  2 ++
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 81f7474..a5b95c1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -49,6 +49,7 @@ struct nv50_query {
uint32_t offset; /* base + i * 32 */
uint8_t state;
boolean is64bit;
+   int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
 };
@@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct 
pipe_query *pq)
 
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
-  PUSH_SPACE(push, 4);
-  BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
-  PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
-  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-  PUSH_DATA (push, 1);
+  q->nesting = nv50->screen->num_occlusion_queries_active++;
+  if (q->nesting) {
+ nv50_query_get(push, q, 0x10, 0x0100f002);
+  } else {
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+  }
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
   nv50_query_get(push, q, 0x10, 0x06805002);
@@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct 
pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
   nv50_query_get(push, q, 0, 0x0100f002);
-  PUSH_SPACE(push, 2);
-  BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
-  PUSH_DATA (push, 0);
+  if (--nv50->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+  }
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
   nv50_query_get(push, q, 0, 0x06805002);
@@ -319,7 +327,7 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
   res8[0] = TRUE;
   break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
-  res64[0] = q->data[1];
+  res64[0] = q->data[1] - q->data[5];
   break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
@@ -396,8 +404,7 @@ nv50_render_condition(struct pipe_context *pipe,
   case PIPE_QUERY_OCCLUSION_COUNTER:
   case PIPE_QUERY_OCCLUSION_PREDICATE:
  if (likely(!condition)) {
-/* XXX: Placeholder, handle nesting here if available */
-if (unlikely(false))
+if (unlikely(q->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
  NV50_3D_COND_MODE_ALWAYS;
 else
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b..3a12a1f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -54,6 +54,8 @@ struct nv50_screen {
struct nv50_context *cur_ctx;
struct nv50_graph_state save_state;
 
+   int num_occlusion_queries_active;
+
struct nouveau_bo *code;
struct nouveau_bo *uniforms;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nv50/nvc0: force cache flush for constbufs

2015-07-14 Thread Samuel Pitoiset
This fixes the following piglit test:
  ext_transform_feedback-immediate-reuse-uniform-buffer

I didn't test on nvc0 but this should work as expected.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_shader_state.c   | 2 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index c698782..932d1c3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -108,6 +108,8 @@ nv50_constbufs_validate(struct nv50_context *nv50)
  }
   }
}
+
+   nv50->cb_dirty = 1; /* Force cache flush for constbufs */
 }
 
 static boolean
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 785e52e..f8a30f2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -464,6 +464,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
  }
   }
}
+
+   nvc0->cb_dirty = 1; /* Force cache flush for constbufs */
 }
 
 static void
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nouveau: always align buffers to 0x100

2015-07-14 Thread Samuel Pitoiset
Only constbufs must be aligned to 0x100, but since a TFB buffer can be
rebinded as a constant buffer it must be also aligned.

This patch prevents this behaviour by aligning everything to 256-byte
increments at buffer creation.

This fixes dmesg fails for the following piglit test:
  ext_transform_feedback-immediate-reuse-uniform-buffer -auto -fbo

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nouveau_buffer.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c 
b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 09cdbb5..83d5288 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -40,12 +40,7 @@ static INLINE boolean
 nouveau_buffer_allocate(struct nouveau_screen *screen,
 struct nv04_resource *buf, unsigned domain)
 {
-   uint32_t size = buf->base.width0;
-
-   if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_COMPUTE_RESOURCE |
- PIPE_BIND_SHADER_RESOURCE))
-  size = align(size, 0x100);
+   uint32_t size = align(buf->base.width0, 0x100);
 
if (domain == NOUVEAU_BO_VRAM) {
   buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: add a missing parameter to nvc0_set_shader_images()

2015-07-16 Thread Samuel Pitoiset
This fixes a compilation warning introduced in commit 05a12c5
(gallium: add interface for writable shader images).

While we are at it, fix indentation and rename parameters according to
the gallium interface.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 337559c..d18b064 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1125,9 +1125,9 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
 }
 
 static void
-nvc0_set_shader_images(struct pipe_context *pipe,
-  unsigned start, unsigned nr,
-  struct pipe_image_view **views)
+nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
+   unsigned start_slot, unsigned count,
+   struct pipe_image_view **views)
 {
 #if 0
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH] nv50: adjust min/max lod by base level on G80

2015-07-20 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 07/20/2015 09:26 AM, Ilia Mirkin wrote:

Make the assumption that there's a 1:1 TIC <-> TSC connection, and
increase min/max lod by the relevant texture's base level. Also if
there's no mipfilter, we have to enable it while forcing min/max lod to
the base level.

This fixes many, but not all, tex-miplevel-selection tests on G80.

Signed-off-by: Ilia Mirkin 
---

All the textureLod tests fail. If I also adjust the lod_bias by the
first_level, then the regular tests start failing.

Not sure what the right move is here... need to trace the blob to see
what it does here.

  src/gallium/drivers/nouveau/nv50/nv50_state.c  |  1 +
  .../drivers/nouveau/nv50/nv50_stateobj_tex.h   |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_tex.c| 39 ++
  3 files changed, 41 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d4d41af..98c4c3a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -464,6 +464,7 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry);
 float f[2];
  
+   so->pipe = *cso;

 so->id = -1;
  
 so->tsc[0] = (0x00026000 |

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h 
b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
index 99548cb..9a19166 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -5,6 +5,7 @@
  #include "pipe/p_state.h"
  
  struct nv50_tsc_entry {

+   struct pipe_sampler_state pipe;
 int id;
 uint32_t tsc[8];
  };
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c 
b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index 17ae27f..d79c813 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -344,6 +344,45 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
   PUSH_DATA (push, (i << 4) | 0);
   continue;
}
+  if (nv50->base.screen->class_3d == NV50_3D_CLASS) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
+
+ /* We must make sure that the MIN_LOD is at least set to the first
+  * level for the G80
+  */
+ bool need_update = false;
+ float min_lod = CLAMP(
+   tic->pipe.u.tex.first_level + tsc->pipe.min_lod, 0.0f, 15.0f);
+ float max_lod = CLAMP(
+   tic->pipe.u.tex.first_level + tsc->pipe.max_lod, 0.0f, 15.0f);
+
+ if (tsc->pipe.min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+uint32_t old_tsc1 = tsc->tsc[1];
+tsc->tsc[1] &= ~NV50_TSC_1_MIPF__MASK;
+if (tic->pipe.u.tex.first_level) {
+   tsc->tsc[1] |= NV50_TSC_1_MIPF_NEAREST;
+   max_lod = min_lod = tic->pipe.u.tex.first_level;
+}
+if (tsc->tsc[1] != old_tsc1)
+   need_update = true;
+ }
+
+ uint32_t new_tsc2 =
+(((int)(max_lod * 256.0f) & 0xfff) << 12) |
+((int)(min_lod * 256.0f) & 0xfff);
+ if ((tsc->tsc[2] & 0xff) != new_tsc2) {
+tsc->tsc[2] &= ~0xffu;
+tsc->tsc[2] |= new_tsc2;
+need_update = true;
+ }
+
+ if (need_update && tsc->id >= 0) {
+nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
+65536 + tsc->id * 32,
+NOUVEAU_BO_VRAM, 32, tsc->tsc);
+need_flush = TRUE;
+ }
+  }
if (tsc->id < 0) {
   tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc);
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: force cache flush for Uniform Buffer Objects

2015-07-21 Thread Samuel Pitoiset
This fixes the following piglit test:
  ext_transform_feedback-immediate-reuse-uniform-buffer

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 1ec5642..9369093 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -99,6 +99,8 @@ nv50_constbufs_validate(struct nv50_context *nv50)
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
 
BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+
+   nv50->cb_dirty = 1; /* Force cache flush for UBO. */
 } else {
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (i << 8) | p | 0);
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: force cache flush when binding a new ubo

2015-07-21 Thread Samuel Pitoiset
This fixes the following piglit test:
  ext_transform_feedback-immediate-reuse-uniform-buffer

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index b07558a..2428314 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -455,6 +455,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, (i << 4) | 1);
 
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+
+   nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
 } else {
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
-- 
2.4.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/7] nouveau: implement the nvif hardware performance counters interface

2015-07-23 Thread Samuel Pitoiset



On 07/22/2015 10:29 PM, Martin Peres wrote:



On 01/07/15 01:01, Samuel Pitoiset wrote:

This commit implements the base interface for hardware performance
counters that will be shared between nv50 and nvc0 drivers.

TODO: Bump libdrm version of mesa when nvif will be merged.

Changes since v2:
- remove double-query thing for domains, signals and sources

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/Makefile.sources  |   2 +
  src/gallium/drivers/nouveau/nouveau_perfmon.c | 290 
++

  src/gallium/drivers/nouveau/nouveau_perfmon.h |  58 ++
  src/gallium/drivers/nouveau/nouveau_screen.c  |   5 +
  src/gallium/drivers/nouveau/nouveau_screen.h  |   1 +
  5 files changed, 356 insertions(+)
  create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c
  create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources

index 3fae3bc..3da0bdc 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -10,6 +10,8 @@ C_SOURCES := \
  nouveau_heap.h \
  nouveau_mm.c \
  nouveau_mm.h \
+nouveau_perfmon.c \
+nouveau_perfmon.h \
  nouveau_screen.c \
  nouveau_screen.h \
  nouveau_statebuf.h \
diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c 
b/src/gallium/drivers/nouveau/nouveau_perfmon.c

new file mode 100644
index 000..e1d4546
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom 
the

+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
included in

+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 
EVENT SHALL

+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
USE OR

+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+
+#include "util/u_memory.h"
+
+#include "nouveau_debug.h"
+#include "nouveau_winsys.h"
+#include "nouveau_perfmon.h"
+
+static int
+nouveau_perfmon_query_sources(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom,
+  struct nouveau_perfmon_sig *sig)
+{
+   struct nvif_perfmon_query_source_v0 args = {};
+
+   args.iter   = 1;


Why start iterating from 1 and not 0?


Starting from 1 will give you the first source of the signal (because -1 
is performed on the kernel side).





+   args.domain = dom->id;
+   args.signal = sig->signal;
+   do {
+  struct nouveau_perfmon_src *src;
+  int ret;
+
+  ret = nouveau_object_mthd(pm->object, 
NVIF_PERFMON_V0_QUERY_SOURCE,

+&args, sizeof(args));
+  if (ret)
+ return ret;


 You do not check what happens if you do not expose any source for 
this signal. A test on args.iter != 0x with a return if not the 
case would be nice!


If no sources are exposed for a signal, it will return -EINVAL.
But we don't care to handle this because we check if a signal exposes 
sources before querying them. (cf. perfmon_query_signals()).





+
+  src = CALLOC_STRUCT(nouveau_perfmon_src);
+  if (!src)
+ return -ENOMEM;
+
+#if 0
+  debug_printf("id   = %d\n", args.source);
+  debug_printf("name = %s\n", args.name);
+  debug_printf("mask = %08x\n", args.mask);
+  debug_printf("\n");
+#endif
+
+  src->id = args.source;
+  strncpy(src->name, args.name, sizeof(src->name));
+  list_addtail(&src->head, &sig->sources);
+   } while (args.iter != 0xff);
+
+   return 0;
+}
+
+static int
+nouveau_perfmon_query_signals(struct nouveau_perfmon *pm,
+  struct nouveau_perfmon_dom *dom)
+{
+   struct nvif_perfmon_query_signal_v0 args = {};
+
+   args.iter   = 1;
+   args.domain = dom->id;
+   do {
+  struct nouveau_perfmon_sig *sig;
+  int ret;
+
+  ret = nouveau_object_mthd(pm->object, 
NVIF_PERFMON_V0_QUERY_SIGNAL,

+&

Re: [Mesa-dev] [PATCH v2 2/7] nv50: allocate a software object class

2015-07-23 Thread Samuel Pitoiset



On 07/22/2015 10:35 PM, Martin Peres wrote:

On 01/07/15 01:01, Samuel Pitoiset wrote:

This will allow to monitor global performance counters through the
command stream of the GPU instead of using ioctls.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_winsys.h |  1 +
  3 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index 6583a35..c985344 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 nouveau_object_del(&screen->eng2d);
 nouveau_object_del(&screen->m2mf);
 nouveau_object_del(&screen->sync);
+   nouveau_object_del(&screen->sw);
   nouveau_screen_fini(&screen->base);
  @@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
 BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
 PUSH_DATA (push, screen->tesla->handle);
  +   BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+   PUSH_DATA (push, screen->sw->handle);
+
 BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
 PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
  @@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev)
goto fail;
 }
  +   ret = nouveau_object_new(chan, 0xbeef506e, 0x506e,

I guess the 0x506e needs to be defined in libdrm, right?


According to nvc0, the handle is not defined in libdrm.
No need to do that I think, isn't it?



Other than that, it is Reviewed-by: Martin Peres 

+NULL, 0, &screen->sw);
+   if (ret) {
+  NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret);
+  goto fail;
+   }
+
 ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
  NULL, 0, &screen->m2mf);
 if (ret) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h

index 881051b..69fdfdb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -93,6 +93,7 @@ struct nv50_screen {
 struct nouveau_object *tesla;
 struct nouveau_object *eng2d;
 struct nouveau_object *m2mf;
+   struct nouveau_object *sw;
  };
static INLINE struct nv50_screen *
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h 
b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h

index e8578c8..5cb33ef 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct 
nouveau_bo *bo, uint32_t flags)

  #define SUBC_COMPUTE(m) 6, (m)
  #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
  +#define SUBC_SW(m) 7, (m)
static INLINE uint32_t
  NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/7] nv50: configure the ring buffer for reading back PM counters

2015-07-23 Thread Samuel Pitoiset



On 07/22/2015 10:54 PM, Martin Peres wrote:



On 01/07/15 01:01, Samuel Pitoiset wrote:

To write data at the right offset, the kernel has to know some
parameters of this ring buffer, like the number of domains and the
maximum number of queries.

Changes since v2:
- only configure the ring buffer if the notifier BO is allocated
- only use one BEGIN_NV04()

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index ab95d65..335bff1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -442,6 +442,16 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
 BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
 PUSH_DATA (push, screen->sw->handle);
  +   if (screen->query) {
+  /* Do not need to configure the ring buffer used to read back
+   * global performance counters when it is not allocated. */
+  BEGIN_NV04(push, SUBC_SW(0x0190), 1);
+  PUSH_DATA (push, screen->query->handle);
+  BEGIN_NV04(push, SUBC_SW(0x0600), 2);


Shouldn't we have the sw method's handle defined in libdrm?


Same as patch 2. No need to do it I think.



Anyway, patches 3 and 4 are

Reviewed-by: Martin Peres 

+  PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES);
+  PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS);
+   }
+
 BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
 PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 5/7] nv50: add support for compute/graphics global performance counters

2015-07-23 Thread Samuel Pitoiset



On 07/23/2015 12:05 AM, Martin Peres wrote:

On 01/07/15 01:01, Samuel Pitoiset wrote:

This commit adds support for both compute and graphics global
performance counters which have been reverse engineered with
CUPTI (Linux) and PerfKit (Windows).

Currently, only one query type can be monitored at the same time because
the Gallium's HUD doesn't fit pretty well. This will be improved later.

Changes since v2:
- replace \% by percentage
- remove one extra call to PUSH_SPACE
- use nouveau_fence instead of my hand-made fence mechanism

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 1066 
+++-

  src/gallium/drivers/nouveau/nv50/nv50_screen.h |   35 +
  2 files changed, 1096 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c

index 81f7474..7fb6f3a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -27,6 +27,8 @@
  #include "nv50/nv50_context.h"
  #include "nv_object.xml.h"
  +#include "nouveau_perfmon.h"
+
  #define NV50_QUERY_STATE_READY   0
  #define NV50_QUERY_STATE_ACTIVE  1
  #define NV50_QUERY_STATE_ENDED   2
@@ -51,10 +53,25 @@ struct nv50_query {
 boolean is64bit;
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
+   struct nouveau_object *perfdom;
  };
#define NV50_QUERY_ALLOC_SPACE 256
  +#ifdef DEBUG
No need to guard the definition of this function. The compiler will 
get rid of it if it has no users.


Fixed.


+static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args);
+#endif
+
+static boolean
+nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *);
+static void
+nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *);
+static boolean
+nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *);
+static void nv50_hw_pm_query_end(struct nv50_context *, struct 
nv50_query *);

+static boolean nv50_hw_pm_query_result(struct nv50_context *,
+struct nv50_query *, boolean, 
void *);

+
  static INLINE struct nv50_query *
  nv50_query(struct pipe_query *pipe)
  {
@@ -96,9 +113,15 @@ nv50_query_allocate(struct nv50_context *nv50, 
struct nv50_query *q, int size)

  static void
  nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
  {
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
+   struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_query *q = nv50_query(pq);
+
+   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST))

+  nv50_hw_pm_query_destroy(nv50, q);
+
+   nv50_query_allocate(nv50, q, 0);
+   nouveau_fence_ref(NULL, &q->fence);
+   FREE(q);
  }
static struct pipe_query *
@@ -120,6 +143,12 @@ nv50_query_create(struct pipe_context *pipe, 
unsigned type, unsigned index)

   type == PIPE_QUERY_PRIMITIVES_EMITTED ||
   type == PIPE_QUERY_SO_STATISTICS ||
   type == PIPE_QUERY_PIPELINE_STATISTICS);
+   if (type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST) {
+  /* Hardware global performance counters are not 64 bits, but 
we also use

+   * a fence to make sure the query is ready. */


I do not understand the logic of this comment.


Only 64-bits queries use a nouveau_fence to make sure result is 
available. 32-bits queries use a hand-made sequence number.
Global PM are declared as 32-bits queries but we also use a 
nouveau_fence to check the result.

I'll rewrite that comment.


+  q->is64bit = TRUE;
+   }
+
 q->type = type;
   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
@@ -127,6 +156,11 @@ nv50_query_create(struct pipe_context *pipe, 
unsigned type, unsigned index)
q->data -= 32 / sizeof(*q->data); /* we advance before 
query_begin ! */

 }
  +   if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= 
NV50_HW_PM_QUERY_LAST)) {

+  if (!nv50_hw_pm_query_create(nv50, q))
+ return NULL;
+   }
+
 return (struct pipe_query *)q;
  }
  @@ -151,6 +185,7 @@ nv50_query_begin(struct pipe_context *pipe, 
struct pipe_query *pq)

 struct nv50_context *nv50 = nv50_context(pipe);
 struct nouveau_pushbuf *push = nv50->base.pushbuf;
 struct nv50_query *q = nv50_query(pq);
+   boolean ret = TRUE;
   /* For occlusion queries we have to change the storage, 
because a previous
  * query might set the initial render conition to FALSE even 
*after* we re-
@@ -205,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, 
struct pipe_query *pq)

nv50_query_get(push, q, 0x10, 0x5002);
break;
 default:
+  if ((q->type >= NV50_HW_PM_QUERY(0) && q->type

Re: [Mesa-dev] [PATCH v2 7/7] nv50: enable GL_AMD_performance_monitor

2015-07-23 Thread Samuel Pitoiset



On 07/23/2015 12:14 AM, Martin Peres wrote:

On 01/07/15 01:01, Samuel Pitoiset wrote:

This exposes a group of global performance counters that enables
GL_AMD_performance_monitor. All piglit tests are okay.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 35 
++

  src/gallium/drivers/nouveau/nv50/nv50_screen.c |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_screen.h |  6 +
  3 files changed, 42 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c

index 7dadb77..6d57305 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -1563,6 +1563,7 @@ nv50_screen_get_driver_query_info(struct 
pipe_screen *pscreen,

 info->name = cfg->event->name;
   info->query_type = NV50_HW_PM_QUERY(id);
+ info->group_id = NV50_HW_PM_QUERY_GROUP;
   info->max_value.u64 =
  (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) 
? 100 : 0;

   return 1;
@@ -1573,6 +1574,40 @@ nv50_screen_get_driver_query_info(struct 
pipe_screen *pscreen,

 return 0;
  }
  +int
+nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
+unsigned id,
+struct 
pipe_driver_query_group_info *info)

+{
+   struct nv50_screen *screen = nv50_screen(pscreen);
+   int count = 0;
+
+   // TODO: Check DRM version when nvif will be merged in libdrm!
+   if (screen->base.perfmon) {
+  count++; /* NV50_HW_PM_QUERY_GROUP */
+   }
+
+   if (!info)
+  return count;
+
+   if (id == NV50_HW_PM_QUERY_GROUP) {
+  if (screen->base.perfmon) {
+ info->name = "Global performance counters";
+ info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+ info->num_queries = NV50_HW_PM_QUERY_COUNT;
+ info->max_active_queries = 1; /* TODO: get rid of this 
limitation! */

+ return 1;
+  }
+   }
+
+   /* user asked for info about non-existing query group */
+   info->name = "this_is_not_the_query_group_you_are_looking_for";
+   info->max_active_queries = 0;
+   info->num_queries = 0;
+   info->type = 0;
+   return 0;
+}
+
  void
  nv50_init_query_functions(struct nv50_context *nv50)
  {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index ac1acd1..05f921d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -749,6 +749,7 @@ nv50_screen_create(struct nouveau_device *dev)
 pscreen->get_shader_param = nv50_screen_get_shader_param;
 pscreen->get_paramf = nv50_screen_get_paramf;
 pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
+   pscreen->get_driver_query_group_info = 
nv50_screen_get_driver_query_group_info;

   nv50_screen_init_resource_functions(pscreen);
  diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.h

index 69127c0..807ae0e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen)
 return (struct nv50_screen *)screen;
  }
  +/* Hardware global performance counters groups. */
+#define NV50_HW_PM_QUERY_GROUP 0
+
  /* Hardware global performance counters. */
  #define NV50_HW_PM_QUERY_COUNT  24
  #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i))
@@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen)
  int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned,
struct pipe_driver_query_info 
*);
  +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, 
unsigned,
+struct 
pipe_driver_query_group_info *);

+
  boolean nv50_blitter_create(struct nv50_screen *);
  void nv50_blitter_destroy(struct nv50_screen *);

Everything looks good to me! Excellent work Samuel!

Reviewed-by: Martin Peres 


Thanks for reviewing all the series martin.



IIRC, the kernel patches were supposed to land in 4.2 as there was no 
pull request from Ben, it will likely end up in 4.3. As for the libdrm 
patches, did you review them? Looking forward to seeing this series 
merged!


Yeah, I hope they will be merged in 4.3. Anyway, Ben still have to 
review the software methods interface and to add support of nvif in libdrm.

I'll ping him in the next few days. :)



It would also be nice to start a discussion to rework the Gallium HUD 
to avoid the stupid problem that we have where we can only monitor one 
signal at a time!


Yeah, this is going to be a bit hard to rework but that's required to 
monitor more than only one hardware global performance counters.



Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Samuel Pitoiset



On 10/10/2015 09:58 PM, Ilia Mirkin wrote:

On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset
 wrote:


On 10/10/2015 09:42 PM, Ilia Mirkin wrote:

On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
 wrote:

This patch looks fine except that it should be a bit more normalized. I
mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for
PUSH_SPACE calls, sometimes you add it sometimes not.

Meh. We need to get our error checking situation straight, but this
isn't the patch to do it in.


Yeah, but this needs to be clarified.

What does?


I mean, we should either use PUSH_SPACE everywhere or not at all, and 
always breaks (or not) when PUSH_SPACE fails.

That's really a minor issue.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-10 Thread Samuel Pitoiset



On 10/10/2015 10:17 PM, Ilia Mirkin wrote:

On Sat, Oct 10, 2015 at 4:21 PM, Samuel Pitoiset
 wrote:


On 10/10/2015 09:58 PM, Ilia Mirkin wrote:

On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset
 wrote:


On 10/10/2015 09:42 PM, Ilia Mirkin wrote:

On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset
 wrote:

This patch looks fine except that it should be a bit more normalized. I
mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same
for
PUSH_SPACE calls, sometimes you add it sometimes not.

Meh. We need to get our error checking situation straight, but this
isn't the patch to do it in.


Yeah, but this needs to be clarified.

What does?


I mean, we should either use PUSH_SPACE everywhere or not at all, and always
breaks (or not) when PUSH_SPACE fails.
That's really a minor issue.

It's actually a major issue. Error-handling is practically
non-existent. There are a couple of spots here and there, but it
doesn't really scale up. I guess I (semi-)accidentally removed a
couple of spots that error checked, but, again, meh. Doing this for
real will require some careful thought.


Yeah, okay. So we really need to improve error-handling. :)


   -ilia


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space

2015-10-11 Thread Samuel Pitoiset
I did a full piglit run on Fermi. There are no regressions and you fixed 
texelFetch tests and other ones which failed with that assert.


I'm lazy to do it on Tesla, so:

Reviewed-by: Samuel Pitoiset 

Thanks!

On 10/10/2015 11:09 AM, Ilia Mirkin wrote:

We still have to push everything out, might as well kick earlier and
flip pushbufs when we know we'll need it. This resolves some issues with
the new policy of making sure that we always leave a bit of room at the
end for fences.

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
  src/gallium/drivers/nouveau/nv50/nv50_shader_state.c |  9 ++---
  src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16 +++-
  src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20 +---
  3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index fdde11f..941555f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
 PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
  }
  while (words) {
-   unsigned nr;
-
-   if (!PUSH_SPACE(push, 16))
-  break;
-   nr = PUSH_AVAIL(push);
-   assert(nr >= 16);
-   nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+   unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
  
+   PUSH_SPACE(push, nr + 3);

 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
 PUSH_DATA (push, (start << 8) | b);
 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c 
b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index be51407..9a3fd1e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
 PUSH_DATA (push, 0);
  
 while (count) {

-  unsigned nr;
-
-  if (!PUSH_SPACE(push, 16))
- break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 1);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+  unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
  
BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);

PUSH_DATAp(push, src, nr);
@@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
 nouveau_pushbuf_validate(push);
  
 while (words) {

-  unsigned nr;
-
-  nr = PUSH_AVAIL(push);
-  nr = MIN2(nr - 7, words);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+  unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
  
+  PUSH_SPACE(push, nr + 7);

BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
PUSH_DATAh(push, bo->offset + base);
PUSH_DATA (push, bo->offset + base);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index aaec60a..d459dd6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
 nouveau_pushbuf_validate(push);
  
 while (count) {

-  unsigned nr;
+  unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
  
-  if (!PUSH_SPACE(push, 16))

+  if (!PUSH_SPACE(push, nr + 9))
   break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 9);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
  
BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);

PUSH_DATAh(push, dst->offset + offset);
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
 nouveau_pushbuf_validate(push);
  
 while (count) {

-  unsigned nr;
+  unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));
  
-  if (!PUSH_SPACE(push, 16))

+  if (!PUSH_SPACE(push, nr + 10))
   break;
-  nr = PUSH_AVAIL(push);
-  assert(nr >= 16);
-  nr = MIN2(count, nr - 8);
-  nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
  
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);

PUSH_DATAh(push, dst->offset + offset);
@@ -571,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
 PUSH_DATA (push, bo->offset + base);
  
 while (words) {

-  unsigned nr = PUSH_AVAIL(push);
-  nr = MIN2(nr, words);
-  nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+  unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);
  
PUSH_SPACE(push, nr + 2);

PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);


___
mesa-dev mailing list
mesa

[Mesa-dev] [PATCH 06/16] nvc0: allow to use 8 MP counters on Fermi

2015-10-16 Thread Samuel Pitoiset
On Fermi, we have one domain of 8 MP counters while we have
two domains of 4 MP counters on Kepler.

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 30 +-
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h|  2 +-
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index b810d25..2060662 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -144,7 +144,7 @@ struct nvc0_hw_sm_counter_cfg
 
 struct nvc0_hw_sm_query_cfg
 {
-   struct nvc0_hw_sm_counter_cfg ctr[4];
+   struct nvc0_hw_sm_counter_cfg ctr[8];
uint8_t num_counters;
uint8_t op;
uint8_t norm[2]; /* normalization num,denom */
@@ -418,7 +418,6 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
const struct nvc0_hw_sm_query_cfg *cfg;
unsigned i, c;
-   unsigned num_ab[2] = { 0, 0 };
 
if (screen->base.class_3d >= NVE4_3D_CLASS)
   return nve4_hw_sm_begin_query(nvc0, hq);
@@ -426,17 +425,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
 
/* check if we have enough free counter slots */
-   for (i = 0; i < cfg->num_counters; ++i)
-  num_ab[cfg->ctr[i].sig_dom]++;
-
-   if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
-   screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
+   if (screen->pm.num_hw_sm_active[0] + cfg->num_counters > 8) {
   NOUVEAU_ERR("Not enough free MP counter slots !\n");
   return false;
}
 
-   assert(cfg->num_counters <= 4);
-   PUSH_SPACE(push, 4 * 8 * 6 + 4);
+   assert(cfg->num_counters <= 8);
+   PUSH_SPACE(push, 4 * 8 * 6 + 2);
 
/* set sequence field to 0 (used to check if result is available) */
for (i = 0; i < screen->mp_count; ++i) {
@@ -446,23 +441,21 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
hq->sequence++;
 
for (i = 0; i < cfg->num_counters; ++i) {
-  const unsigned d = cfg->ctr[i].sig_dom;
   unsigned s;
 
-  if (!screen->pm.num_hw_sm_active[d]) {
+  if (!screen->pm.num_hw_sm_active[0]) {
  BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
  PUSH_DATA (push, 0x8000);
   }
-  screen->pm.num_hw_sm_active[d]++;
+  screen->pm.num_hw_sm_active[0]++;
 
-  for (c = d * 4; c < (d * 4 + 4); ++c) {
+  for (c = 0; c < 8; ++c) {
  if (!screen->pm.mp_counter[c]) {
 hsq->ctr[i] = c;
 screen->pm.mp_counter[c] = hsq;
 break;
  }
   }
-  assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
 
   /* configure and reset the counter(s) */
   for (s = 0; s < cfg->ctr[i].num_src; s++) {
@@ -522,7 +515,8 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
/* release counters for this query */
for (c = 0; c < 8; ++c) {
   if (screen->pm.mp_counter[c] == hsq) {
- screen->pm.num_hw_sm_active[c / 4]--;
+ uint8_t d = is_nve4 ? c / 4 : 0; /* only one domain for NVC0:NVE4 */
+ screen->pm.num_hw_sm_active[d]--;
  screen->pm.mp_counter[c] = NULL;
   }
}
@@ -568,7 +562,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
 }
 
 static inline bool
-nvc0_hw_sm_query_read_data(uint32_t count[32][4],
+nvc0_hw_sm_query_read_data(uint32_t count[32][8],
struct nvc0_context *nvc0, bool wait,
struct nvc0_hw_query *hq,
const struct nvc0_hw_sm_query_cfg *cfg,
@@ -594,7 +588,7 @@ nvc0_hw_sm_query_read_data(uint32_t count[32][4],
 }
 
 static inline bool
-nve4_hw_sm_query_read_data(uint32_t count[32][4],
+nve4_hw_sm_query_read_data(uint32_t count[32][8],
struct nvc0_context *nvc0, bool wait,
struct nvc0_hw_query *hq,
const struct nvc0_hw_sm_query_cfg *cfg,
@@ -640,7 +634,7 @@ static boolean
 nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query 
*hq,
 boolean wait, union pipe_query_result *result)
 {
-   uint32_t count[32][4];
+   uint32_t count[32][8];
uint64_t value = 0;
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h
index bab6f34..0ad8a91 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h
@@ -5,7 +5,7 @@
 
 struct nvc0_hw_sm_q

[Mesa-dev] [PATCH 02/16] nvc0: split out begin_query() hook used by MP counters

2015-10-16 Thread Samuel Pitoiset
The way we configure MP performance counters is going to pretty
different between Fermi and Kepler. Having two separate functions
is much better.

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 108 -
 1 file changed, 84 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 8e2239f..f83966a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -338,16 +338,91 @@ nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, 
struct nvc0_hw_query *hq)
 }
 
 static boolean
+nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
+{
+   struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
+   const struct nvc0_hw_sm_query_cfg *cfg;
+   unsigned i, c;
+   unsigned num_ab[2] = { 0, 0 };
+
+   cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
+
+   /* check if we have enough free counter slots */
+   for (i = 0; i < cfg->num_counters; ++i)
+  num_ab[cfg->ctr[i].sig_dom]++;
+
+   if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 ||
+   screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) {
+  NOUVEAU_ERR("Not enough free MP counter slots !\n");
+  return false;
+   }
+
+   assert(cfg->num_counters <= 4);
+   PUSH_SPACE(push, 4 * 8 * + 6);
+
+   if (!screen->pm.mp_counters_enabled) {
+  screen->pm.mp_counters_enabled = true;
+  BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
+  PUSH_DATA (push, 0x1fcb);
+   }
+
+   /* set sequence field to 0 (used to check if result is available) */
+   for (i = 0; i < screen->mp_count; ++i)
+  hq->data[i * 10 + 10] = 0;
+   hq->sequence++;
+
+   for (i = 0; i < cfg->num_counters; ++i) {
+  const unsigned d = cfg->ctr[i].sig_dom;
+
+  if (!screen->pm.num_hw_sm_active[d]) {
+ uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
+ if (screen->pm.num_hw_sm_active[!d])
+m |= 1 << (7 + (8 * d));
+ BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
+ PUSH_DATA (push, m);
+  }
+  screen->pm.num_hw_sm_active[d]++;
+
+  for (c = d * 4; c < (d * 4 + 4); ++c) {
+ if (!screen->pm.mp_counter[c]) {
+hsq->ctr[i] = c;
+screen->pm.mp_counter[c] = hsq;
+break;
+ }
+  }
+  assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
+
+  /* configure and reset the counter(s) */
+ if (d == 0)
+BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ else
+BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+   }
+   return true;
+}
+
+static boolean
 nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 {
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-   const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq);
const struct nvc0_hw_sm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
 
+   if (screen->base.class_3d >= NVE4_3D_CLASS)
+  return nve4_hw_sm_begin_query(nvc0, hq);
+
cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
 
/* check if we have enough free counter slots */
@@ -361,7 +436,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
}
 
assert(cfg->num_counters <= 4);
-   PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
+   PUSH_SPACE(push, 4 * 8 * 6 + 6);
 
if (!screen->pm.mp_counters_enabled) {
   screen->pm.mp_counters_enabled = true;
@@ -376,6 +451,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
 
for (i = 0; i < cfg->num_counters; ++i) {
   const unsigned d = cfg->ctr[i].sig_dom;
+  unsigned s;
 
   if (!screen->pm.num_hw_sm_active[d]) {
  uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
@@ -396,31 +472,15 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
   assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
 
   /* configure and reset the counter(s) */
-  if (is_nve4) {
- if (d == 0)
-BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
- else
- 

[Mesa-dev] [PATCH 05/16] nvc0: fix sequence field init for MP counters on Fermi

2015-10-16 Thread Samuel Pitoiset
Sequence fields are located at MP[i] + 0x20 in the buffer object.
This is used to check if result is available for MP[i].

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index f7b49da..b810d25 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -439,8 +439,10 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
PUSH_SPACE(push, 4 * 8 * 6 + 4);
 
/* set sequence field to 0 (used to check if result is available) */
-   for (i = 0; i < screen->mp_count; ++i)
-  hq->data[i * 10 + 10] = 0;
+   for (i = 0; i < screen->mp_count; ++i) {
+  const unsigned b = (0x24 / 4) * i;
+  hq->data[b + 8] = 0;
+   }
hq->sequence++;
 
for (i = 0; i < cfg->num_counters; ++i) {
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/16] nvc0: fix queries which use multiple MP counters on Fermi

2015-10-16 Thread Samuel Pitoiset
Queries which use more than one MP counters was misconfigured and
computing the final result was also wrong because sources need to
be configured on different hardware counters instead.

According to the blob, computing the result is now as follows:

FOR  i..n
val += ctr[i] * pow(2, i)

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 128 +
 1 file changed, 81 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 2060662..99e9073 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -131,7 +131,7 @@ struct nvc0_hw_sm_counter_cfg
uint32_t num_src : 3;  /* number of sources (1 - 6, only for NVC0:NVE4) */
uint32_t sig_dom : 1;  /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
uint32_t sig_sel : 8;  /* signal group */
-   uint64_t src_sel;  /* signal selection for up to 6 sources (48 bit) */
+   uint32_t src_sel;  /* signal selection for up to 4 sources */
 };
 
 #define NVC0_COUNTER_OPn_SUM0
@@ -280,44 +280,82 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
0x80001de7ULL
 };
 
-#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { 
{ f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 
24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 
1, 1 } }
+#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s }
+#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = {  \
+   { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\
+}
 
 static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
 {
-   _Q(ACTIVE_CYCLES,   0x, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(ACTIVE_WARPS,0x, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 
0x54, 0x65),
-   _Q(ATOM_COUNT,  0x, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(BRANCH,  0x, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 
0x00, 0x00),
-   _Q(DIVERGENT_BRANCH,0x, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GLD_REQUEST, 0x, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GRED_COUNT,  0x, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GST_REQUEST, 0x, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_EXECUTED,   0x, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED1_0,  0x, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED1_1,  0x, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED2_0,  0x, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED2_1,  0x, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(LOCAL_LD,0x, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(LOCAL_ST,0x, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_0,  0x, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_1,  0x, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_2,  0x, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_3,  0x, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_4,  0x, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_5,  0x, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_6,  0x, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_7,  0x, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(SHARED_LD,   0x, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(SHARED_ST,   0x, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(THREADS_LAUNCHED,0x, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 
0x54, 0x65),
-   _Q(TH_INST_EXECUTED_0,  0x, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_1,  0x, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_2,  0x, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_3,  0x, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(WARPS_LAUNCHED,  0x, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00),
+   _Q(ACTIVE_CYCLES,   1, _C(0x, LOGOP, 0x11, 0x)),
+   _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x0010),
+  _C(0x, LOGOP, 0x24, 0x0021),
+  _C(0x, LOGOP, 0x24, 0x0032),
+  _C(0

[Mesa-dev] [PATCH 01/16] nvc0: remove useless call to query_get_cfg() in nvc0_hw_sm_query_end()

2015-10-16 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 3bdb90a..8e2239f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -439,9 +439,6 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
const uint grid[3] = { screen->mp_count, 1, 1 };
unsigned c;
-   const struct nvc0_hw_sm_query_cfg *cfg;
-
-   cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq);
 
if (unlikely(!screen->pm.prog)) {
   struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
@@ -495,6 +492,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
PUSH_SPACE(push, 16);
mask = 0;
for (c = 0; c < 8; ++c) {
+  const struct nvc0_hw_sm_query_cfg *cfg;
   unsigned i;
 
   hsq = screen->pm.mp_counter[c];
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/16] nvc0: correctly enable the MP counters' multiplexer on Fermi

2015-10-16 Thread Samuel Pitoiset
Writing 0x408000 to 0x419e00 (like on Kepler) has no effect on Fermi
because we only have one domain of 8 counters. Instead, we have to
write 0x8000.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index a74bfee..f7b49da 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -448,11 +448,8 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
   unsigned s;
 
   if (!screen->pm.num_hw_sm_active[d]) {
- uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
- if (screen->pm.num_hw_sm_active[!d])
-m |= 1 << (7 + (8 * d));
  BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
- PUSH_DATA (push, m);
+ PUSH_DATA (push, 0x8000);
   }
   screen->pm.num_hw_sm_active[d]++;
 
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/16] nvc0: rip off the kepler MP-enabling logic from the Fermi codepath

2015-10-16 Thread Samuel Pitoiset
Writing 0x1fcb to 0x419eac is definitely not related to MP counters and
has no effect on Fermi (although this enables MP counters on Kepler).

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index f83966a..a74bfee 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -436,13 +436,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
}
 
assert(cfg->num_counters <= 4);
-   PUSH_SPACE(push, 4 * 8 * 6 + 6);
-
-   if (!screen->pm.mp_counters_enabled) {
-  screen->pm.mp_counters_enabled = true;
-  BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
-  PUSH_DATA (push, 0x1fcb);
-   }
+   PUSH_SPACE(push, 4 * 8 * 6 + 4);
 
/* set sequence field to 0 (used to check if result is available) */
for (i = 0; i < screen->mp_count; ++i)
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/16] nvc0: improve MP counters support on Fermi

2015-10-16 Thread Samuel Pitoiset
Hello,

This series fixes some issues related to MP performance counters on Fermi.

MP counters for GF100/GF110 have also been improved because they are compute
capability 2.0 while the other Fermi chipsets are 2.1 and some HW events are
different.

Compute support is now enabled by default on Fermi because I can't reproduce
those weird effects on 3D state. This has been probably fixed as a side effect.
Anyway, if someone complain about it, I'll be glad to fix it.

This series has been tested with the following apps:
 - xonotic-glx
 - heaven
 - valley
 - glxgears, glxspheres64 and so on ...

And with the following Fermi chipsets:
 - GF100
 - GF108
 - GF110
 - GF114
 - GF116
 - GF119

Note that with GF100/GF110, some MP counters are not correctly context-switched
and results might be wrong. This is a known issue that we need to fix on the
Nouveau side.

There are no regressions with piglit.

I'll submit an other series in the next few days which adds some performance
monitoring metrics on Fermi and I'll double check MP counters on Kepler.

Thanks.

Samuel Pitoiset (16):
  nvc0: remove useless call to query_get_cfg() in nvc0_hw_sm_query_end()
  nvc0: split out begin_query() hook used by MP counters
  nvc0: rip off the kepler MP-enabling logic from the Fermi codepath
  nvc0: correctly enable the MP counters' multiplexer on Fermi
  nvc0: fix sequence field init for MP counters on Fermi
  nvc0: allow to use 8 MP counters on Fermi
  nvc0: fix queries which use multiple MP counters on Fermi
  nvc0: fix monitoring multiple MP counters queries on Fermi
  nvc0: fix unaligned mem access when reading MP counters on Fermi
  nvc0: store the number of GPCs to nvc0_screen
  nvc0: read MP counters of all GPCs on Fermi
  nvc0: allow only one active query for the MP counters group
  nvc0: enable compute support by default on Fermi
  nvc0: move SW/HW queries info to their respective files
  nvc0: add MP counters variants for GF100/GF110
  nvc0: add a note about MP counters on GF100/GF110

 src/gallium/drivers/nouveau/nvc0/nvc0_query.c  | 208 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   |  14 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h   |   3 +
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 809 ++---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h|   7 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c   |  64 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h   |   3 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   8 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h |   1 +
 9 files changed, 834 insertions(+), 283 deletions(-)

-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/16] nvc0: allow only one active query for the MP counters group

2015-10-16 Thread Samuel Pitoiset
Because we can't expose the number of hardware counters needed for each
different query, we don't want to allow more than one active query
simultaneously to avoid failure when the maximum number of counters
is reached. Note that these groups of GPU counters are currently only
used by AMD_performance_monitor.

Like for Kepler, this limits the maximum number of active queries
to 1 on Fermi.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f8d4ba1..c81b85a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -371,22 +371,20 @@ nvc0_screen_get_driver_query_group_info(struct 
pipe_screen *pscreen,
  info->name = "MP counters";
  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
+ /* Because we can't expose the number of hardware counters needed for
+  * each different query, we don't want to allow more than one active
+  * query simultaneously to avoid failure when the maximum number of
+  * counters is reached. Note that these groups of GPU counters are
+  * currently only used by AMD_performance_monitor.
+  */
+ info->max_active_queries = 1;
+
  if (screen->base.class_3d == NVE4_3D_CLASS) {
 info->num_queries = NVE4_HW_SM_QUERY_COUNT;
-
- /* On NVE4+, each multiprocessor have 8 hardware counters 
separated
-  * in two distinct domains, but we allow only one active query
-  * simultaneously because some of them use more than one hardware
-  * counter and this will result in an undefined behaviour. */
- info->max_active_queries = 1; /* TODO: handle multiple hw 
counters */
- return 1;
+return 1;
  } else
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->num_queries = NVC0_HW_SM_QUERY_COUNT;
-
-/* On NVC0:NVE4, each multiprocessor have 8 hardware counters
- * in a single domain. */
-info->max_active_queries = 8;
 return 1;
  }
   }
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/16] nvc0: add a note about MP counters on GF100/GF110

2015-10-16 Thread Samuel Pitoiset
MP counters on GF100/GF110 (compute capability 2.0) are buggy
because there is a context-switch problem that we need to fix.
Results might be wrong sometimes, be careful!

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index c4b40a6..c5ce3e3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -286,6 +286,11 @@ static const struct nvc0_hw_sm_query_cfg 
nve4_hw_sm_queries[] =
 #undef _M2B
 
 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
+/* NOTES:
+ * - MP counters on GF100/GF110 (compute capability 2.0) are buggy
+ *   because there is a context-switch problem that we need to fix.
+ *   Results might be wrong sometimes, be careful!
+ */
 static const char *nvc0_hw_sm_query_names[] =
 {
/* MP counters */
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/16] nvc0: add MP counters variants for GF100/GF110

2015-10-16 Thread Samuel Pitoiset
GF100 and GF110 chipsets are compute capability 2.0, while the other
Fermi chipsets are compute capability 2.1. That's why, some MP counters
are different between these chipsets and we need to handle variants.

Signed-off-by: Samuel Pitoiet 
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 559 ++---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h|   1 +
 2 files changed, 483 insertions(+), 77 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 174beef..c4b40a6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -298,6 +298,7 @@ static const char *nvc0_hw_sm_query_names[] =
"gred_count",
"gst_request",
"inst_executed",
+   "inst_issued",
"inst_issued1_0",
"inst_issued1_1",
"inst_issued2_0",
@@ -373,82 +374,456 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
 };
 
 #define _C(f, o, g, m, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, m, 0, g, s }
-#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = {  \
-   { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\
-}
+#define _Q(n, c) [NVC0_HW_SM_QUERY_##n] = c
+
+/*  Compute capability 2.0 (GF100/GF110)  */
+static const struct nvc0_hw_sm_query_cfg
+nvc0_active_cycles =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x11, 0x00ff, 0x),
+   .num_counters = 1,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_active_warps =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0010),
+   .ctr[1]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0020),
+   .ctr[2]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0030),
+   .ctr[3]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0040),
+   .ctr[4]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0050),
+   .ctr[5]   = _C(0x, LOGOP, 0x24, 0x00ff, 0x0060),
+   .num_counters = 6,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_atom_count =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x63, 0x00ff, 0x0030),
+   .num_counters = 1,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_branch =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x1a, 0x00ff, 0x),
+   .ctr[1]   = _C(0x, LOGOP, 0x1a, 0x00ff, 0x0010),
+   .num_counters = 2,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_divergent_branch =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x19, 0x00ff, 0x0020),
+   .ctr[1]   = _C(0x, LOGOP, 0x19, 0x00ff, 0x0030),
+   .num_counters = 2,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_gld_request =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x64, 0x00ff, 0x0030),
+   .num_counters = 1,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_gred_count =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x63, 0x00ff, 0x0040),
+   .num_counters = 1,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_gst_request =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x64, 0x00ff, 0x0060),
+   .num_counters = 1,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_inst_executed =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x2d, 0x, 0x1000),
+   .ctr[1]   = _C(0x, LOGOP, 0x2d, 0x, 0x1010),
+   .num_counters = 2,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+nvc0_inst_issued =
+{
+   .ctr[0]   = _C(0x, LOGOP, 0x27, 0x, 0x7060),
+   .ctr[1]   = _C(0x, LOGOP, 0x27, 0x, 0x7070),
+   .num_counters = 2,
+   .op   = NVC0_COUNTER_OPn_SUM,
+   .norm = { 1, 1 },
+};
 
-static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
-{
-   _Q(ACTIVE_CYCLES,   1, _C(0x, LOGOP, 0x11, 0x00ff, 0x)),
-   _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x00ff, 0x0010),
-  _C(0x, LOGOP, 0x24, 0x00ff, 0x0020),
-  _C(0x, LOGOP, 0x24, 0x00ff, 0x0030),
-  _C(0x, LOGOP, 0x24, 0x00ff, 0x0040),
-  _C(0x, LOGOP, 0x24, 0x00ff, 0x0050),
-  _C(0x, LOGOP, 0x24, 0x00ff, 0x0060)),
-   _Q(ATOM_COUNT,  1, _C(0x, LOGOP, 0x6

[Mesa-dev] [PATCH 11/16] nvc0: read MP counters of all GPCs on Fermi

2015-10-16 Thread Samuel Pitoiset
When a card has more than one GPC, the grid used by the compute
kernel which reads MP performance counters seems to be too small.
The consequence is that the kernel is not launched on all TPCs.

Increasing the grid size using the number of GPCs now launches
enough blocks and we can read MP performance counters of all TPCs.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 840f200..c22ad4b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -528,7 +528,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
-   const uint grid[3] = { screen->mp_count, 1, 1 };
+   const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 };
unsigned c;
 
if (unlikely(!screen->pm.prog)) {
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/16] nvc0: fix monitoring multiple MP counters queries on Fermi

2015-10-16 Thread Samuel Pitoiset
For strange reasons, the signal id depends on the slot selected on Fermi
but not on Kepler. Fortunately, the signal ids are just offseted by the
slot id!

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 147 +++--
 1 file changed, 79 insertions(+), 68 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 99e9073..6ee9fa6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -128,7 +128,7 @@ struct nvc0_hw_sm_counter_cfg
 {
uint32_t func: 16; /* mask or 4-bit logic op (depending on mode) */
uint32_t mode: 4;  /* LOGOP,B6,LOGOP_B6(_PULSE) */
-   uint32_t num_src : 3;  /* number of sources (1 - 6, only for NVC0:NVE4) */
+   uint32_t src_mask; /* mask for signal selection (only for NVC0:NVE4) */
uint32_t sig_dom : 1;  /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
uint32_t sig_sel : 8;  /* signal group */
uint32_t src_sel;  /* signal selection for up to 4 sources */
@@ -280,78 +280,78 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
0x80001de7ULL
 };
 
-#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s }
+#define _C(f, o, g, m, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, m, 0, g, s }
 #define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = {  \
{ __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\
 }
 
 static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
 {
-   _Q(ACTIVE_CYCLES,   1, _C(0x, LOGOP, 0x11, 0x)),
-   _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x0010),
-  _C(0x, LOGOP, 0x24, 0x0021),
-  _C(0x, LOGOP, 0x24, 0x0032),
-  _C(0x, LOGOP, 0x24, 0x0043),
-  _C(0x, LOGOP, 0x24, 0x0054),
-  _C(0x, LOGOP, 0x24, 0x0065)),
-   _Q(ATOM_COUNT,  1, _C(0x, LOGOP, 0x63, 0x0030)),
-   _Q(BRANCH,  2, _C(0x, LOGOP, 0x1a, 0x),
-  _C(0x, LOGOP, 0x1a, 0x0011)),
-   _Q(DIVERGENT_BRANCH,2, _C(0x, LOGOP, 0x19, 0x0020),
-  _C(0x, LOGOP, 0x19, 0x0031)),
-   _Q(GLD_REQUEST, 1, _C(0x, LOGOP, 0x64, 0x0030)),
-   _Q(GRED_COUNT,  1, _C(0x, LOGOP, 0x63, 0x0040)),
-   _Q(GST_REQUEST, 1, _C(0x, LOGOP, 0x64, 0x0060)),
-   _Q(INST_EXECUTED,   3, _C(0x, LOGOP, 0x2d, 0x),
-  _C(0x, LOGOP, 0x2d, 0x0011),
-  _C(0x, LOGOP, 0x2d, 0x0022)),
-   _Q(INST_ISSUED1_0,  1, _C(0x, LOGOP, 0x7e, 0x0010)),
-   _Q(INST_ISSUED1_1,  1, _C(0x, LOGOP, 0x7e, 0x0040)),
-   _Q(INST_ISSUED2_0,  1, _C(0x, LOGOP, 0x7e, 0x0020)),
-   _Q(INST_ISSUED2_1,  1, _C(0x, LOGOP, 0x7e, 0x0050)),
-   _Q(LOCAL_LD,1, _C(0x, LOGOP, 0x64, 0x0020)),
-   _Q(LOCAL_ST,1, _C(0x, LOGOP, 0x64, 0x0050)),
-   _Q(PROF_TRIGGER_0,  1, _C(0x, LOGOP, 0x01, 0x)),
-   _Q(PROF_TRIGGER_1,  1, _C(0x, LOGOP, 0x01, 0x0010)),
-   _Q(PROF_TRIGGER_2,  1, _C(0x, LOGOP, 0x01, 0x0020)),
-   _Q(PROF_TRIGGER_3,  1, _C(0x, LOGOP, 0x01, 0x0030)),
-   _Q(PROF_TRIGGER_4,  1, _C(0x, LOGOP, 0x01, 0x0040)),
-   _Q(PROF_TRIGGER_5,  1, _C(0x, LOGOP, 0x01, 0x0050)),
-   _Q(PROF_TRIGGER_6,  1, _C(0x, LOGOP, 0x01, 0x0060)),
-   _Q(PROF_TRIGGER_7,  1, _C(0x, LOGOP, 0x01, 0x0070)),
-   _Q(SHARED_LD,   1, _C(0x, LOGOP, 0x64, 0x0010)),
-   _Q(SHARED_ST,   1, _C(0x, LOGOP, 0x64, 0x0040)),
-   _Q(THREADS_LAUNCHED,6, _C(0x, LOGOP, 0x26, 0x0010),
-  _C(0x, LOGOP, 0x26, 0x0021),
-  _C(0x, LOGOP, 0x26, 0x0032),
-  _C(0x, LOGOP, 0x26, 0x0043),
-  _C(0x, LOGOP, 0x26, 0x0054),
-  _C(0x, LOGOP, 0x26, 0x0065)),
-   _Q(TH_INST_EXECUTED_0,  6, _C(0x, LOGOP, 0xa3, 0x),
-  _C(0x, LOGOP, 0xa3, 0x0011),
-  _C(0x, LOGOP, 0xa3, 0x0022),
-  _C(0x, LOGOP, 0xa3, 0x0033),
-  _C(0x, LOGOP, 0xa3, 0x0044),
-  _C(0x, LOGOP, 0xa3, 0x0055)),
-   _Q(TH_INST_EXECUTED_1,  6, _C(0x, LOGOP, 0xa5, 0x),
-  _C(0x, LOGOP, 0xa5, 0x0011),
-  _C(0x, LOGOP, 0xa5, 0x0022

[Mesa-dev] [PATCH 13/16] nvc0: enable compute support by default on Fermi

2015-10-16 Thread Samuel Pitoiset
Compute support was not enabled by default because weird effects
on 3D state happened, but I can't reproduce them anymore.

This also enables MP performance counters by default on Fermi.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c  | 3 +--
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +--
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index c81b85a..80f311b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -291,7 +291,6 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
 count += NVE4_HW_SM_QUERY_COUNT;
  } else
  if (screen->base.class_3d < NVE4_3D_CLASS) {
-/* NVC0_COMPUTE is not always enabled */
 count += NVC0_HW_SM_QUERY_COUNT;
  }
   }
@@ -358,7 +357,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 count++;
  } else
  if (screen->base.class_3d < NVE4_3D_CLASS) {
-count++; /* NVC0_COMPUTE is not always enabled */
+count++;
  }
   }
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ba53d10..d6a4ac5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -561,12 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
switch (screen->base.device->chipset & ~0xf) {
case 0xc0:
case 0xd0:
-  /* Using COMPUTE has weird effects on 3D state, we need to
-   * investigate this further before enabling it by default.
-   */
-  if (debug_get_bool_option("NVC0_COMPUTE", false))
- return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
-  return 0;
+  return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
case 0xe0:
   return nve4_screen_compute_setup(screen, screen->base.pushbuf);
case 0xf0:
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/16] nvc0: fix unaligned mem access when reading MP counters on Fermi

2015-10-16 Thread Samuel Pitoiset
Memory access have to be aligned to 128-bits. Note that this
doesn't happen when the card only has TPC.

This patch fixes the following dmesg fail:

gr: GPC0/TPC1/MP trap: global 0004 [MULTIPLE_WARP_ERRORS] warp 000f
[UNALIGNED_MEM_ACCESS]

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 6ee9fa6..840f200 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -247,7 +247,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
 * mov b32 $r11 c0[0x4]
 * ext u32 $r8 $r9 0x414
 * (not $p0) exit
-* mul $r8 u32 $r8 u32 36
+* mul $r8 u32 $r8 u32 48
 * add b32 $r10 $c $r10 $r8
 * add b32 $r11 $r11 0x0 $c
 * mov b32 $r8 c0[0x8]
@@ -270,7 +270,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
0x280040001002dde4ULL,
0x7000c01050921c03ULL,
0x800021e7ULL,
-   0x100090821c02ULL,
+   0x1000c0821c02ULL,
0x480120a29c03ULL,
0x08b2dc42ULL,
0x2800400020021de4ULL,
@@ -473,7 +473,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct 
nvc0_hw_query *hq)
 
/* set sequence field to 0 (used to check if result is available) */
for (i = 0; i < screen->mp_count; ++i) {
-  const unsigned b = (0x24 / 4) * i;
+  const unsigned b = (0x30 / 4) * i;
   hq->data[b + 8] = 0;
}
hq->sequence++;
@@ -617,7 +617,7 @@ nvc0_hw_sm_query_read_data(uint32_t count[32][8],
unsigned p, c;
 
for (p = 0; p < mp_count; ++p) {
-  const unsigned b = (0x24 / 4) * p;
+  const unsigned b = (0x30 / 4) * p;
 
   for (c = 0; c < cfg->num_counters; ++c) {
  if (hq->data[b + 8] != hq->sequence) {
@@ -815,7 +815,10 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, 
unsigned type)
 */
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
} else {
-  /* for each MP:
+  /*
+   * Note that padding is used to align memory access to 128 bits.
+   *
+   * for each MP:
* [00] = MP.C0
* [04] = MP.C1
* [08] = MP.C2
@@ -825,8 +828,11 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, 
unsigned type)
* [18] = MP.C6
* [1c] = MP.C7
* [20] = MP.sequence
+   * [24] = padding
+   * [28] = padding
+   * [2c] = padding
*/
-  space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
+  space = (8 + 1 + 3) * nvc0->screen->mp_count * sizeof(uint32_t);
}
 
if (!nvc0_hw_query_allocate(nvc0, &hq->base, space)) {
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/16] nvc0: store the number of GPCs to nvc0_screen

2015-10-16 Thread Samuel Pitoiset
NOUVEAU_GETPARAM_GRAPH_UNITS param returns the number of GPCs, the total
number of TPCs and the number of ROP units. Note that when the DRM
version is too old the default number of GPCs is fixed to 4.

This will be used to launch the compute kernel which is used to read MP
performance counters over all GPCs.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index afd91e6..ba53d10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -914,6 +914,7 @@ nvc0_screen_create(struct nouveau_device *dev)
   else
  value = (16 << 8) | 4;
}
+   screen->gpc_count = value & 0x;
screen->mp_count = value >> 8;
screen->mp_count_compute = screen->mp_count;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8cf7560..857eb03 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -67,6 +67,7 @@ struct nvc0_screen {
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
 
+   uint8_t gpc_count;
uint16_t mp_count;
uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
 
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/16] nvc0: move SW/HW queries info to their respective files

2015-10-16 Thread Samuel Pitoiset
This will help for handling HW SM queries variants on Fermi.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c  | 185 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   |  14 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h   |   3 +
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 133 +++
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h|   4 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c   |  64 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h   |   3 +
 7 files changed, 228 insertions(+), 178 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 80f311b..e4752e2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -141,163 +141,19 @@ nvc0_render_condition(struct pipe_context *pipe,
PUSH_DATA (push, hq->bo->offset + hq->offset);
 }
 
-/* === DRIVER STATISTICS === */
-
-#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
-
-static const char *nvc0_sw_query_drv_stat_names[] =
-{
-   "drv-tex_obj_current_count",
-   "drv-tex_obj_current_bytes",
-   "drv-buf_obj_current_count",
-   "drv-buf_obj_current_bytes_vid",
-   "drv-buf_obj_current_bytes_sys",
-   "drv-tex_transfers_rd",
-   "drv-tex_transfers_wr",
-   "drv-tex_copy_count",
-   "drv-tex_blit_count",
-   "drv-tex_cache_flush_count",
-   "drv-buf_transfers_rd",
-   "drv-buf_transfers_wr",
-   "drv-buf_read_bytes_staging_vid",
-   "drv-buf_write_bytes_direct",
-   "drv-buf_write_bytes_staging_vid",
-   "drv-buf_write_bytes_staging_sys",
-   "drv-buf_copy_bytes",
-   "drv-buf_non_kernel_fence_sync_count",
-   "drv-any_non_kernel_fence_sync_count",
-   "drv-query_sync_count",
-   "drv-gpu_serialize_count",
-   "drv-draw_calls_array",
-   "drv-draw_calls_indexed",
-   "drv-draw_calls_fallback_count",
-   "drv-user_buffer_upload_bytes",
-   "drv-constbuf_upload_count",
-   "drv-constbuf_upload_bytes",
-   "drv-pushbuf_count",
-   "drv-resource_validate_count"
-};
-
-#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
-
-/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
-
-/* NOTE: intentionally using the same names as NV */
-static const char *nve4_hw_sm_query_names[] =
-{
-   /* MP counters */
-   "active_cycles",
-   "active_warps",
-   "atom_count",
-   "branch",
-   "divergent_branch",
-   "gld_request",
-   "global_ld_mem_divergence_replays",
-   "global_store_transaction",
-   "global_st_mem_divergence_replays",
-   "gred_count",
-   "gst_request",
-   "inst_executed",
-   "inst_issued",
-   "inst_issued1",
-   "inst_issued2",
-   "l1_global_load_hit",
-   "l1_global_load_miss",
-   "l1_local_load_hit",
-   "l1_local_load_miss",
-   "l1_local_store_hit",
-   "l1_local_store_miss",
-   "l1_shared_load_transactions",
-   "l1_shared_store_transactions",
-   "local_load",
-   "local_load_transactions",
-   "local_store",
-   "local_store_transactions",
-   "prof_trigger_00",
-   "prof_trigger_01",
-   "prof_trigger_02",
-   "prof_trigger_03",
-   "prof_trigger_04",
-   "prof_trigger_05",
-   "prof_trigger_06",
-   "prof_trigger_07",
-   "shared_load",
-   "shared_load_replay",
-   "shared_store",
-   "shared_store_replay",
-   "sm_cta_launched",
-   "threads_launched",
-   "uncached_global_load_transaction",
-   "warps_launched",
-   /* metrics, i.e. functions of the MP counters */
-   "metric-ipc",   /* inst_executed, clock */
-   "metric-ipac",  /* inst_executed, active_cycles */
-   "metric-ipec",  /* inst_executed, (bool)inst_executed */
-   "metric-achieved_occupancy",/* active_warps, active_cycles */
-   "metric-sm_efficiency", /* active_cycles, clock */
-   "metric-inst_replay_overhead"   /* inst_issued, inst_executed */
-};
-
-/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
-static const char *nvc0_hw_sm_query_names[] =
-{
-   /* MP counters */
-   "active_cycles",
-   "active_warps",
-   "atom_count",
-   "branch",
-   "divergent_branch",
-   "gld_request",
-   "gred_count",
-   "gst_request",
-   &

Re: [Mesa-dev] [PATCH 10/16] nvc0: store the number of GPCs to nvc0_screen

2015-10-16 Thread Samuel Pitoiset



On 10/16/2015 07:24 PM, Ilia Mirkin wrote:

On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset
 wrote:

NOUVEAU_GETPARAM_GRAPH_UNITS param returns the number of GPCs, the total
number of TPCs and the number of ROP units. Note that when the DRM
version is too old the default number of GPCs is fixed to 4.

This will be used to launch the compute kernel which is used to read MP
performance counters over all GPCs.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 1 +
  2 files changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index afd91e6..ba53d10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -914,6 +914,7 @@ nvc0_screen_create(struct nouveau_device *dev)
else
   value = (16 << 8) | 4;
 }
+   screen->gpc_count = value & 0x;

gpc_count is a u8... I guess you wanted to make this &0xff? Doesn't
*really* matter in practice, but a bit confusing.


Yes, good catch.




 screen->mp_count = value >> 8;
 screen->mp_count_compute = screen->mp_count;

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8cf7560..857eb03 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -67,6 +67,7 @@ struct nvc0_screen {
 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
 struct nouveau_bo *poly_cache;

+   uint8_t gpc_count;
 uint16_t mp_count;
 uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */

--
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/16] nvc0: fix queries which use multiple MP counters on Fermi

2015-10-16 Thread Samuel Pitoiset



On 10/16/2015 07:32 PM, Ilia Mirkin wrote:

Other than the missing * (1 << c), what was wrong with the old logic?


MP counters were always configured starting from slot 0 to cfg->num_src. 
So, if you monitored two hardware events at the same time, the first one 
was overwritten by the second one.


Now, I check if the slot is free before pushing the configuration 
through the pushbuf.




On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset
 wrote:

Queries which use more than one MP counters was misconfigured and
computing the final result was also wrong because sources need to
be configured on different hardware counters instead.

According to the blob, computing the result is now as follows:

FOR  i..n
val += ctr[i] * pow(2, i)

Signed-off-by: Samuel Pitoiset 
---
  .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 128 +
  1 file changed, 81 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 2060662..99e9073 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -131,7 +131,7 @@ struct nvc0_hw_sm_counter_cfg
 uint32_t num_src : 3;  /* number of sources (1 - 6, only for NVC0:NVE4) */
 uint32_t sig_dom : 1;  /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
 uint32_t sig_sel : 8;  /* signal group */
-   uint64_t src_sel;  /* signal selection for up to 6 sources (48 bit) */
+   uint32_t src_sel;  /* signal selection for up to 4 sources */
  };

  #define NVC0_COUNTER_OPn_SUM0
@@ -280,44 +280,82 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
 0x80001de7ULL
  };

-#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, 
NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL 
<< 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
+#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s }
+#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = {  \
+   { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\
+}

  static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] =
  {
-   _Q(ACTIVE_CYCLES,   0x, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(ACTIVE_WARPS,0x, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 
0x54, 0x65),
-   _Q(ATOM_COUNT,  0x, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(BRANCH,  0x, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 
0x00, 0x00),
-   _Q(DIVERGENT_BRANCH,0x, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GLD_REQUEST, 0x, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GRED_COUNT,  0x, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(GST_REQUEST, 0x, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_EXECUTED,   0x, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED1_0,  0x, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED1_1,  0x, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED2_0,  0x, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(INST_ISSUED2_1,  0x, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(LOCAL_LD,0x, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(LOCAL_ST,0x, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_0,  0x, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_1,  0x, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_2,  0x, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_3,  0x, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_4,  0x, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_5,  0x, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_6,  0x, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(PROF_TRIGGER_7,  0x, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(SHARED_LD,   0x, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(SHARED_ST,   0x, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 
0x00, 0x00),
-   _Q(THREADS_LAUNCHED,0x, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 
0x54, 0x65),
-   _Q(TH_INST_EXECUTED_0,  0x, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_1,  0x, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_2,  0x, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 
0x44, 0x55),
-   _Q(TH_INST_EXECUTED_3,  0x, LOGOP,

Re: [Mesa-dev] [PATCH 16/16] nvc0: add a note about MP counters on GF100/GF110

2015-10-16 Thread Samuel Pitoiset



On 10/16/2015 07:50 PM, Ilia Mirkin wrote:

Series is Reviewed-by: Ilia Mirkin 

I had a couple of very minor comments that you can feel free to accept
or ignore.


Thank you for this review Ilia, and I think I'll accept all of your 
changes. :)




On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset
 wrote:

MP counters on GF100/GF110 (compute capability 2.0) are buggy
because there is a context-switch problem that we need to fix.
Results might be wrong sometimes, be careful!

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index c4b40a6..c5ce3e3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -286,6 +286,11 @@ static const struct nvc0_hw_sm_query_cfg 
nve4_hw_sm_queries[] =
  #undef _M2B

  /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
+/* NOTES:
+ * - MP counters on GF100/GF110 (compute capability 2.0) are buggy
+ *   because there is a context-switch problem that we need to fix.
+ *   Results might be wrong sometimes, be careful!
+ */
  static const char *nvc0_hw_sm_query_names[] =
  {
 /* MP counters */
--
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi

2015-10-16 Thread Samuel Pitoiset
As explained in the CUDA toolkit documentation, "a metric is a
characteristic of an application that is calculated from one or more
event values."

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   |  19 +-
 .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444 +
 .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h|  42 ++
 4 files changed, 504 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
 create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index edc6cf4..c18e9f5 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -154,6 +154,8 @@ NVC0_C_SOURCES := \
nvc0/nvc0_query.h \
nvc0/nvc0_query_hw.c \
nvc0/nvc0_query_hw.h \
+   nvc0/nvc0_query_hw_metric.c \
+   nvc0/nvc0_query_hw_metric.h \
nvc0/nvc0_query_hw_sm.c \
nvc0/nvc0_query_hw_sm.h \
nvc0/nvc0_query_sw.c \
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 91254be..90ee82f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -25,6 +25,7 @@
 
 #include "nvc0/nvc0_context.h"
 #include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
 #include "nvc0/nvc0_query_hw_sm.h"
 
 #define NVC0_HW_QUERY_STATE_READY   0
@@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned 
type, unsigned index)
   return (struct nvc0_query *)hq;
}
 
+   hq = nvc0_hw_metric_create_query(nvc0, type);
+   if (hq) {
+  hq->base.funcs = &hw_query_funcs;
+  return (struct nvc0_query *)hq;
+   }
+
hq = CALLOC_STRUCT(nvc0_hw_query);
if (!hq)
   return NULL;
@@ -435,14 +442,20 @@ int
 nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
   struct pipe_driver_query_info *info)
 {
-   int num_hw_sm_queries = 0;
+   int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
 
num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL);
+   num_hw_metric_queries =
+  nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);
 
if (!info)
-  return num_hw_sm_queries;
+  return num_hw_sm_queries + num_hw_metric_queries;
+
+   if (id < num_hw_sm_queries)
+  return nvc0_hw_sm_get_driver_query_info(screen, id, info);
 
-   return nvc0_hw_sm_get_driver_query_info(screen, id, info);
+   return nvc0_hw_metric_get_driver_query_info(screen,
+   id - num_hw_sm_queries, info);
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
new file mode 100644
index 000..dbe350a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
+static const char *nvc0_hw_metric_names[] =
+{
+   "metric-achieved_occupancy",
+   "metric-branch_efficiency",
+   "metric-inst_issued",
+   "metric-inst_per_wrap",
+   "metric-inst_replay_overhead",
+   "metric-issued_ipc",
+   "metric-issue_slots",
+   "metric-issue_slot_utilization",
+   "metric-ipc",
+};
+
+struct nvc0_hw_metric_query_cfg {
+   uint

Re: [Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi

2015-10-16 Thread Samuel Pitoiset



On 10/16/2015 11:22 PM, Ilia Mirkin wrote:

On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset
 wrote:

As explained in the CUDA toolkit documentation, "a metric is a
characteristic of an application that is calculated from one or more
event values."

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
  src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   |  19 +-
  .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444 +
  .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h|  42 ++
  4 files changed, 504 insertions(+), 3 deletions(-)
  create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
  create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index edc6cf4..c18e9f5 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -154,6 +154,8 @@ NVC0_C_SOURCES := \
 nvc0/nvc0_query.h \
 nvc0/nvc0_query_hw.c \
 nvc0/nvc0_query_hw.h \
+   nvc0/nvc0_query_hw_metric.c \
+   nvc0/nvc0_query_hw_metric.h \
 nvc0/nvc0_query_hw_sm.c \
 nvc0/nvc0_query_hw_sm.h \
 nvc0/nvc0_query_sw.c \
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 91254be..90ee82f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -25,6 +25,7 @@

  #include "nvc0/nvc0_context.h"
  #include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
  #include "nvc0/nvc0_query_hw_sm.h"

  #define NVC0_HW_QUERY_STATE_READY   0
@@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned 
type, unsigned index)
return (struct nvc0_query *)hq;
 }

+   hq = nvc0_hw_metric_create_query(nvc0, type);
+   if (hq) {
+  hq->base.funcs = &hw_query_funcs;
+  return (struct nvc0_query *)hq;
+   }
+
 hq = CALLOC_STRUCT(nvc0_hw_query);
 if (!hq)
return NULL;
@@ -435,14 +442,20 @@ int
  nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
struct pipe_driver_query_info *info)
  {
-   int num_hw_sm_queries = 0;
+   int num_hw_sm_queries = 0, num_hw_metric_queries = 0;

 num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL);
+   num_hw_metric_queries =
+  nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);

 if (!info)
-  return num_hw_sm_queries;
+  return num_hw_sm_queries + num_hw_metric_queries;
+
+   if (id < num_hw_sm_queries)
+  return nvc0_hw_sm_get_driver_query_info(screen, id, info);

-   return nvc0_hw_sm_get_driver_query_info(screen, id, info);
+   return nvc0_hw_metric_get_driver_query_info(screen,
+   id - num_hw_sm_queries, info);
  }

  void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
new file mode 100644
index 000..dbe350a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
+static const char *nvc0_hw_metric_names[] =
+{
+   "metric-achieved_occupancy",
+   "metric-branch_efficiency",
+   "metric-inst_issued",
+   "metric-inst_per_wrap",
+   "metric-inst_replay_overhead",
+   "metric-issued_ipc",
+   "metric-issue_

Re: [Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi

2015-10-16 Thread Samuel Pitoiset



On 10/16/2015 11:57 PM, Ilia Mirkin wrote:

On Fri, Oct 16, 2015 at 5:35 PM, Samuel Pitoiset
 wrote:


On 10/16/2015 11:22 PM, Ilia Mirkin wrote:

On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset
 wrote:

As explained in the CUDA toolkit documentation, "a metric is a
characteristic of an application that is calculated from one or more
event values."

Signed-off-by: Samuel Pitoiset 
---
   src/gallium/drivers/nouveau/Makefile.sources   |   2 +
   src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   |  19 +-
   .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444
+
   .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h|  42 ++
   4 files changed, 504 insertions(+), 3 deletions(-)
   create mode 100644
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
   create mode 100644
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources
b/src/gallium/drivers/nouveau/Makefile.sources
index edc6cf4..c18e9f5 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -154,6 +154,8 @@ NVC0_C_SOURCES := \
  nvc0/nvc0_query.h \
  nvc0/nvc0_query_hw.c \
  nvc0/nvc0_query_hw.h \
+   nvc0/nvc0_query_hw_metric.c \
+   nvc0/nvc0_query_hw_metric.h \
  nvc0/nvc0_query_hw_sm.c \
  nvc0/nvc0_query_hw_sm.h \
  nvc0/nvc0_query_sw.c \
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 91254be..90ee82f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -25,6 +25,7 @@

   #include "nvc0/nvc0_context.h"
   #include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
   #include "nvc0/nvc0_query_hw_sm.h"

   #define NVC0_HW_QUERY_STATE_READY   0
@@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0,
unsigned type, unsigned index)
 return (struct nvc0_query *)hq;
  }

+   hq = nvc0_hw_metric_create_query(nvc0, type);
+   if (hq) {
+  hq->base.funcs = &hw_query_funcs;
+  return (struct nvc0_query *)hq;
+   }
+
  hq = CALLOC_STRUCT(nvc0_hw_query);
  if (!hq)
 return NULL;
@@ -435,14 +442,20 @@ int
   nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
 struct pipe_driver_query_info *info)
   {
-   int num_hw_sm_queries = 0;
+   int num_hw_sm_queries = 0, num_hw_metric_queries = 0;

  num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0,
NULL);
+   num_hw_metric_queries =
+  nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);

  if (!info)
-  return num_hw_sm_queries;
+  return num_hw_sm_queries + num_hw_metric_queries;
+
+   if (id < num_hw_sm_queries)
+  return nvc0_hw_sm_get_driver_query_info(screen, id, info);

-   return nvc0_hw_sm_get_driver_query_info(screen, id, info);
+   return nvc0_hw_metric_get_driver_query_info(screen,
+   id - num_hw_sm_queries,
info);
   }

   void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
new file mode 100644
index 000..dbe350a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
+static const char *nvc0_hw_metric_names[] =
+{
+   "metric-achieved_occupancy",
+   "metric-branch_efficiency",
+   "metric-inst_issued",
+   "metric-inst_

[Mesa-dev] [PATCH] nvc0: do not bind input params at compute state init on Fermi

2015-10-17 Thread Samuel Pitoiset
It looks like binding a constant buffer on compute overwrites the 3D
state. To avoid that, we already re-bind all the 3D constant buffers
after launching a compute grid but this is not enough.

Binding the constant buffer of input parameters for the compute state at
initialization corrupts the 3D constant buffers, and it's just useless
to bind it because this is not needed until we really launch a grid.

This fixes some piglit regressions related to interpolation tests
introduced in "nvc0: enable compute support by default on Fermi".

Fixes: 00d6186 (nvc0: enable compute support by default on Fermi)

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 96d753c..e33af04 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -105,14 +105,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
 
-   /* bind parameters buffer */
-   BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
-   PUSH_DATA (push, screen->parm->size);
-   PUSH_DATAh(push, screen->parm->offset);
-   PUSH_DATA (push, screen->parm->offset);
-   BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
-   PUSH_DATA (push, (0 << 8) | 1);
-
/* TODO: textures & samplers */
 
return 0;
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: do not bind input params at compute state init on Fermi

2015-10-18 Thread Samuel Pitoiset



On 10/17/2015 08:08 PM, Ilia Mirkin wrote:

This seems surprising... could I convince you to trace a test that
executes both a graphics and compute pipeline, which both use
(different) uniforms?


I'm convinced because this seems to be a bit weird for me too.



Anyways, this patch is fine for now, this is Reviewed-by: Ilia Mirkin



Thanks.



On Sat, Oct 17, 2015 at 12:19 PM, Samuel Pitoiset
 wrote:

It looks like binding a constant buffer on compute overwrites the 3D
state. To avoid that, we already re-bind all the 3D constant buffers
after launching a compute grid but this is not enough.

Binding the constant buffer of input parameters for the compute state at
initialization corrupts the 3D constant buffers, and it's just useless
to bind it because this is not needed until we really launch a grid.

This fixes some piglit regressions related to interpolation tests
introduced in "nvc0: enable compute support by default on Fermi".

Fixes: 00d6186 (nvc0: enable compute support by default on Fermi)

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 8 
  1 file changed, 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 96d753c..e33af04 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -105,14 +105,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
 PUSH_DATAh(push, screen->text->offset);
 PUSH_DATA (push, screen->text->offset);

-   /* bind parameters buffer */
-   BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
-   PUSH_DATA (push, screen->parm->size);
-   PUSH_DATAh(push, screen->parm->offset);
-   PUSH_DATA (push, screen->parm->offset);
-   BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
-   PUSH_DATA (push, (0 << 8) | 1);
-
 /* TODO: textures & samplers */

 return 0;
--
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 13/16] nvc0: enable compute support by default on Fermi

2015-10-18 Thread Samuel Pitoiset



On 10/17/2015 09:11 PM, Jan Vesely wrote:

Does this mean it should be possible to hook up clover with nouveau?


As I said Ilia, this is just the ability to launch compute kernels on Fermi.
Unfortunately, OpenCL is still not supported by Nouveau but I hope this 
is going to change.




Jan

On Fri, 2015-10-16 at 19:22 +0200, Samuel Pitoiset wrote:

Compute support was not enabled by default because weird effects
on 3D state happened, but I can't reproduce them anymore.

This also enables MP performance counters by default on Fermi.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c  | 3 +--
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +--
  2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index c81b85a..80f311b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -291,7 +291,6 @@ nvc0_screen_get_driver_query_info(struct
pipe_screen *pscreen,
  count += NVE4_HW_SM_QUERY_COUNT;
   } else
   if (screen->base.class_3d < NVE4_3D_CLASS) {
-/* NVC0_COMPUTE is not always enabled */
  count += NVC0_HW_SM_QUERY_COUNT;
   }
}
@@ -358,7 +357,7 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
  count++;
   } else
   if (screen->base.class_3d < NVE4_3D_CLASS) {
-count++; /* NVC0_COMPUTE is not always enabled */
+count++;
   }
}
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ba53d10..d6a4ac5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -561,12 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen
*screen)
 switch (screen->base.device->chipset & ~0xf) {
 case 0xc0:
 case 0xd0:
-  /* Using COMPUTE has weird effects on 3D state, we need to
-   * investigate this further before enabling it by default.
-   */
-  if (debug_get_bool_option("NVC0_COMPUTE", false))
- return nvc0_screen_compute_setup(screen, screen
->base.pushbuf);
-  return 0;
+  return nvc0_screen_compute_setup(screen, screen
->base.pushbuf);
 case 0xe0:
return nve4_screen_compute_setup(screen, screen
->base.pushbuf);
 case 0xf0:


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] nv50: move nva0_so_target_save_offset() to its correct location

2015-10-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query.c | 18 --
 src/gallium/drivers/nouveau/nv50/nv50_query.h |  3 ---
 src/gallium/drivers/nouveau/nv50/nv50_state.c | 18 ++
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 7718d69..1b4abdb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -444,24 +444,6 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, 
uint16_t method,
 }
 
 void
-nva0_so_target_save_offset(struct pipe_context *pipe,
-   struct pipe_stream_output_target *ptarg,
-   unsigned index, bool serialize)
-{
-   struct nv50_so_target *targ = nv50_so_target(ptarg);
-
-   if (serialize) {
-  struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
-  PUSH_SPACE(push, 2);
-  BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
-  PUSH_DATA (push, 0);
-   }
-
-   nv50_query(targ->pq)->index = index;
-   nv50_query_end(pipe, targ->pq);
-}
-
-void
 nv50_init_query_functions(struct nv50_context *nv50)
 {
struct pipe_context *pipe = &nv50->base.pipe;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query.h
index 722af0c..a703013 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -33,8 +33,5 @@ void nv50_init_query_functions(struct nv50_context *);
 void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
struct nv50_query *, unsigned result_offset);
 void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *);
-void nva0_so_target_save_offset(struct pipe_context *,
-struct pipe_stream_output_target *,
-unsigned, bool);
 
 #endif
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 410e631..8af2add 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -1057,6 +1057,24 @@ nv50_so_target_create(struct pipe_context *pipe,
 }
 
 static void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+   struct pipe_stream_output_target *ptarg,
+   unsigned index, bool serialize)
+{
+   struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+   if (serialize) {
+  struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+  PUSH_SPACE(push, 2);
+  BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+  PUSH_DATA (push, 0);
+   }
+
+   nv50_query(targ->pq)->index = index;
+   pipe->end_query(pipe, targ->pq);
+}
+
+static void
 nv50_so_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg)
 {
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/4] nv50: move HW queries to nv50_query_hw.c/h

2015-10-18 Thread Samuel Pitoiset
Hi there,

As for nvc0, this series moves HW queries to improve readability of this
area of the driver and to prepare the way for both MP counters and global
perf counters.

There are no regressions with piglit.

Feel free to review,
Thanks.

Samuel Pitoiset (4):
  nv50: add a header file for nv50_query
  nv50: move nva0_so_target_save_offset() to its correct location
  nv50: move HW queries to nv50_query_hw.c/h files
  nv50: do not create an invalid HW query type

 src/gallium/drivers/nouveau/Makefile.sources   |   3 +
 src/gallium/drivers/nouveau/nv50/nv50_context.h|  12 +-
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 397 ++--
 src/gallium/drivers/nouveau/nv50/nv50_query.h  |  33 ++
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 410 +
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h   |  50 +++
 .../drivers/nouveau/nv50/nv50_shader_state.c   |   7 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c  |  21 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c|   4 +-
 9 files changed, 551 insertions(+), 386 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h

-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nv50: add a header file for nv50_query

2015-10-18 Thread Samuel Pitoiset
Like for nvc0, this will allow to split different types of queries and
to prepare the way for both global performance counters and MP counters.

While we are at it, make use of nv50_query struct instead of pipe_query.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +--
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 29 ++--
 src/gallium/drivers/nouveau/nv50/nv50_query.h  | 40 ++
 .../drivers/nouveau/nv50/nv50_shader_state.c   |  4 +--
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c|  3 +-
 6 files changed, 49 insertions(+), 40 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index c18e9f5..06d9d97 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -73,6 +73,7 @@ NV50_C_SOURCES := \
nv50/nv50_program.h \
nv50/nv50_push.c \
nv50/nv50_query.c \
+   nv50/nv50_query.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 69c1212..fb74a97 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -16,6 +16,7 @@
 #include "nv50/nv50_program.h"
 #include "nv50/nv50_resource.h"
 #include "nv50/nv50_transfer.h"
+#include "nv50/nv50_query.h"
 
 #include "nouveau_context.h"
 #include "nouveau_debug.h"
@@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
 /* nv50_draw.c */
 extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
 
-/* nv50_query.c */
-void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
-   struct pipe_query *, unsigned result_offset);
-void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
-void nva0_so_target_save_offset(struct pipe_context *,
-struct pipe_stream_output_target *,
-unsigned index, bool seralize);
-
-#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
-
 /* nv50_shader_state.c */
 void nv50_vertprog_validate(struct nv50_context *);
 void nv50_gmtyprog_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 5368ee7..7718d69 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -25,6 +25,7 @@
 #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
 
 #include "nv50/nv50_context.h"
+#include "nv50/nv50_query.h"
 #include "nv_object.xml.h"
 
 #define NV50_QUERY_STATE_READY   0
@@ -39,29 +40,8 @@
  * queries anyway.
  */
 
-struct nv50_query {
-   uint32_t *data;
-   uint16_t type;
-   uint16_t index;
-   uint32_t sequence;
-   struct nouveau_bo *bo;
-   uint32_t base;
-   uint32_t offset; /* base + i * 32 */
-   uint8_t state;
-   bool is64bit;
-   int nesting; /* only used for occlusion queries */
-   struct nouveau_mm_allocation *mm;
-   struct nouveau_fence *fence;
-};
-
 #define NV50_QUERY_ALLOC_SPACE 256
 
-static inline struct nv50_query *
-nv50_query(struct pipe_query *pipe)
-{
-   return (struct nv50_query *)pipe;
-}
-
 static bool
 nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
 {
@@ -363,9 +343,8 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
 }
 
 void
-nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
 {
-   struct nv50_query *q = nv50_query(pq);
unsigned offset = q->offset;
 
PUSH_SPACE(push, 5);
@@ -453,10 +432,8 @@ nv50_render_condition(struct pipe_context *pipe,
 
 void
 nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
-  struct pipe_query *pq, unsigned result_offset)
+  struct nv50_query *q, unsigned result_offset)
 {
-   struct nv50_query *q = nv50_query(pq);
-
nv50_query_update(q);
if (q->state != NV50_QUERY_STATE_READY)
   nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query.h
new file mode 100644
index 000..722af0c
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
@@ -0,0 +1,40 @@
+#ifndef __NV50_QUERY_H__
+#define __NV50_QUERY_H__
+
+#include "pipe/p_context.h"
+
+#include "nouveau_context.h"
+#include "n

[Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type

2015-10-18 Thread Samuel Pitoiset
While we are at it, store the rotate offset for occlusion queries to
nv50_hw_query like on nvc0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +---
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index fcdd183..6260410 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
nv50_query *q)
 * query might set the initial render condition to false even *after* we re-
 * initialized it to true.
 */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  hq->offset += 32;
-  hq->data += 32 / sizeof(*hq->data);
+   if (hq->rotate) {
+  hq->offset += hq->rotate;
+  hq->data += hq->rotate / sizeof(*hq->data);
   if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
  nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
 
@@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
 {
struct nv50_hw_query *hq;
struct nv50_query *q;
+   unsigned space;
 
hq = CALLOC_STRUCT(nv50_hw_query);
if (!hq)
@@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
q->funcs = &hw_query_funcs;
q->type = type;
 
-   if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+  hq->rotate = 32;
+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+  hq->is64bit = true;
+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   default:
+  debug_printf("invalid query type: %u\n", type);
+  FREE(q);
+  return NULL;
+   }
+
+   if (!nv50_hw_query_allocate(nv50, q, space)) {
   FREE(hq);
   return NULL;
}
 
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+   if (hq->rotate) {
   /* we advance before query_begin ! */
-  hq->offset -= 32;
-  hq->data -= 32 / sizeof(*hq->data);
+  hq->offset -= hq->rotate;
+  hq->data -= hq->rotate / sizeof(*hq->data);
}
 
-   hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-
return q;
 }
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index ea2bf24..3a53e8a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -24,9 +24,10 @@ struct nv50_hw_query {
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base_offset;
-   uint32_t offset; /* base + i * 32 */
+   uint32_t offset; /* base + i * rotate */
uint8_t state;
bool is64bit;
+   uint8_t rotate;
int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
-- 
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] nv50: move HW queries to nv50_query_hw.c/h files

2015-10-18 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 354 ++-
 src/gallium/drivers/nouveau/nv50/nv50_query.h  |  26 +-
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 389 +
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h   |  49 +++
 .../drivers/nouveau/nv50/nv50_shader_state.c   |   7 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c  |   3 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c|   5 +-
 8 files changed, 486 insertions(+), 349 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 06d9d97..83f8113 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -74,6 +74,8 @@ NV50_C_SOURCES := \
nv50/nv50_push.c \
nv50/nv50_query.c \
nv50/nv50_query.h \
+   nv50/nv50_query_hw.c \
+   nv50/nv50_query_hw.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1b4abdb..dd9b85b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -26,334 +26,45 @@
 
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_query.h"
-#include "nv_object.xml.h"
-
-#define NV50_QUERY_STATE_READY   0
-#define NV50_QUERY_STATE_ACTIVE  1
-#define NV50_QUERY_STATE_ENDED   2
-#define NV50_QUERY_STATE_FLUSHED 3
-
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
-#define NV50_QUERY_ALLOC_SPACE 256
-
-static bool
-nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
-{
-   struct nv50_screen *screen = nv50->screen;
-   int ret;
-
-   if (q->bo) {
-  nouveau_bo_ref(NULL, &q->bo);
-  if (q->mm) {
- if (q->state == NV50_QUERY_STATE_READY)
-nouveau_mm_free(q->mm);
- else
-nouveau_fence_work(screen->base.fence.current, 
nouveau_mm_free_work,
-   q->mm);
-  }
-   }
-   if (size) {
-  q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, 
&q->base);
-  if (!q->bo)
- return false;
-  q->offset = q->base;
-
-  ret = nouveau_bo_map(q->bo, 0, screen->base.client);
-  if (ret) {
- nv50_query_allocate(nv50, q, 0);
- return false;
-  }
-  q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
-   }
-   return true;
-}
-
-static void
-nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
-{
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
-}
+#include "nv50/nv50_query_hw.h"
 
 static struct pipe_query *
-nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
+nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
 {
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q;
 
-   q = CALLOC_STRUCT(nv50_query);
-   if (!q)
-  return NULL;
-
-   if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
-  FREE(q);
-  return NULL;
-   }
-
-   q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-   q->type = type;
-
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  q->offset -= 32;
-  q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
-   }
-
+   q = nv50_hw_create_query(nv50, type, index);
return (struct pipe_query *)q;
 }
 
 static void
-nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
-   unsigned offset, uint32_t get)
+nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
 {
-   offset += q->offset;
-
-   PUSH_SPACE(push, 5);
-   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
-   BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
-   PUSH_DATAh(push, q->bo->offset + offset);
-   PUSH_DATA (push, q->bo->offset + offset);
-   PUSH_DATA (push, q->sequence);
-   PUSH_DATA (push, get);
+   struct nv50_query *q = nv50_query(pq);
+   q->funcs->destroy_query(nv50_context(pipe), q);
 }
 
 static boolean
-

Re: [Mesa-dev] [PATCH 1/4] nv50: add a header file for nv50_query

2015-10-19 Thread Samuel Pitoiset



On 10/19/2015 10:43 AM, Pierre Moreau wrote:

Hi Samuel,

(some comments further down)

On 11:30 PM - Oct 18 2015, Samuel Pitoiset wrote:

Like for nvc0, this will allow to split different types of queries and
to prepare the way for both global performance counters and MP counters.

While we are at it, make use of nv50_query struct instead of pipe_query.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/Makefile.sources   |  1 +
  src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +--
  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 29 ++--
  src/gallium/drivers/nouveau/nv50/nv50_query.h  | 40 ++
  .../drivers/nouveau/nv50/nv50_shader_state.c   |  4 +--
  src/gallium/drivers/nouveau/nv50/nv50_vbo.c|  3 +-
  6 files changed, 49 insertions(+), 40 deletions(-)
  create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index c18e9f5..06d9d97 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -73,6 +73,7 @@ NV50_C_SOURCES := \
nv50/nv50_program.h \
nv50/nv50_push.c \
nv50/nv50_query.c \
+   nv50/nv50_query.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 69c1212..fb74a97 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -16,6 +16,7 @@
  #include "nv50/nv50_program.h"
  #include "nv50/nv50_resource.h"
  #include "nv50/nv50_transfer.h"
+#include "nv50/nv50_query.h"
  
  #include "nouveau_context.h"

  #include "nouveau_debug.h"
@@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
  /* nv50_draw.c */
  extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
  
-/* nv50_query.c */

-void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
-   struct pipe_query *, unsigned result_offset);
-void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
-void nva0_so_target_save_offset(struct pipe_context *,
-struct pipe_stream_output_target *,
-unsigned index, bool seralize);
-
-#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
-
  /* nv50_shader_state.c */
  void nv50_vertprog_validate(struct nv50_context *);
  void nv50_gmtyprog_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 5368ee7..7718d69 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -25,6 +25,7 @@
  #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
  
  #include "nv50/nv50_context.h"

+#include "nv50/nv50_query.h"
  #include "nv_object.xml.h"
  
  #define NV50_QUERY_STATE_READY   0

@@ -39,29 +40,8 @@
   * queries anyway.
   */
  
-struct nv50_query {

-   uint32_t *data;
-   uint16_t type;
-   uint16_t index;
-   uint32_t sequence;
-   struct nouveau_bo *bo;
-   uint32_t base;
-   uint32_t offset; /* base + i * 32 */
-   uint8_t state;
-   bool is64bit;
-   int nesting; /* only used for occlusion queries */
-   struct nouveau_mm_allocation *mm;
-   struct nouveau_fence *fence;
-};
-
  #define NV50_QUERY_ALLOC_SPACE 256
  
-static inline struct nv50_query *

-nv50_query(struct pipe_query *pipe)
-{
-   return (struct nv50_query *)pipe;
-}
-
  static bool
  nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
  {
@@ -363,9 +343,8 @@ nv50_query_result(struct pipe_context *pipe, struct 
pipe_query *pq,
  }
  
  void

-nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
  {
-   struct nv50_query *q = nv50_query(pq);
 unsigned offset = q->offset;
  
 PUSH_SPACE(push, 5);

@@ -453,10 +432,8 @@ nv50_render_condition(struct pipe_context *pipe,
  
  void

  nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
-  struct pipe_query *pq, unsigned result_offset)
+  struct nv50_query *q, unsigned result_offset)
  {
-   struct nv50_query *q = nv50_query(pq);
-
 nv50_query_update(q);
 if (q->state != NV50_QUERY_STATE_READY)
nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query.h
new file mode 100644
index 000..722af0c
--- /dev/null
+++ b/src/gallium/drive

Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type

2015-10-19 Thread Samuel Pitoiset



On 10/19/2015 11:01 AM, Pierre Moreau wrote:

Hi Samuel,

(some comments below)

On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote:

While we are at it, store the rotate offset for occlusion queries to
nv50_hw_query like on nvc0.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +---
  src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
  2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index fcdd183..6260410 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
nv50_query *q)
  * query might set the initial render condition to false even *after* we 
re-
  * initialized it to true.
  */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  hq->offset += 32;
-  hq->data += 32 / sizeof(*hq->data);
+   if (hq->rotate) {
+  hq->offset += hq->rotate;
+  hq->data += hq->rotate / sizeof(*hq->data);
if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
   nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
  
@@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index)

  {
 struct nv50_hw_query *hq;
 struct nv50_query *q;
+   unsigned space;
  
 hq = CALLOC_STRUCT(nv50_hw_query);

 if (!hq)
@@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
 q->funcs = &hw_query_funcs;
 q->type = type;
  
-   if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {

+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+  hq->rotate = 32;

You should have `hq->rotate` default to 0 in other cases, as IIRC, you have no
guaranty about the value of an uninitialised variable.


CALLOC_STRUCT will be initialize all fields to 0.




+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+  hq->is64bit = true;

Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`.


+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   default:
+  debug_printf("invalid query type: %u\n", type);
+  FREE(q);
+  return NULL;
+   }
+
+   if (!nv50_hw_query_allocate(nv50, q, space)) {

`space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to
introducing this `space` variable? Do you plan to later add other possible
values to it?


I have a patch locally which reduces the size of that buffer for some 
queries,
but this is not really related to this series. I'll submit it later 
(with other patches).




Pierre



FREE(hq);
return NULL;
 }
  
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {

+   if (hq->rotate) {
/* we advance before query_begin ! */
-  hq->offset -= 32;
-  hq->data -= 32 / sizeof(*hq->data);
+  hq->offset -= hq->rotate;
+  hq->data -= hq->rotate / sizeof(*hq->data);
 }
  
-   hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||

- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-
 return q;
  }
  
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h

index ea2bf24..3a53e8a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -24,9 +24,10 @@ struct nv50_hw_query {
 uint32_t sequence;
 struct nouveau_bo *bo;
 uint32_t base_offset;
-   uint32_t offset; /* base + i * 32 */
+   uint32_t offset; /* base + i * rotate */
 uint8_t state;
 bool is64bit;
+   uint8_t rotate;
 int nesting; /* only used for occlusion queries */
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
--
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type

2015-10-19 Thread Samuel Pitoiset



On 10/19/2015 12:43 PM, Pierre Moreau wrote:

On 11:06 AM - Oct 19 2015, Samuel Pitoiset wrote:


On 10/19/2015 11:01 AM, Pierre Moreau wrote:

Hi Samuel,

(some comments below)

On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote:

While we are at it, store the rotate offset for occlusion queries to
nv50_hw_query like on nvc0.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +---
  src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
  2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index fcdd183..6260410 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
nv50_query *q)
  * query might set the initial render condition to false even *after* we 
re-
  * initialized it to true.
  */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  hq->offset += 32;
-  hq->data += 32 / sizeof(*hq->data);
+   if (hq->rotate) {
+  hq->offset += hq->rotate;
+  hq->data += hq->rotate / sizeof(*hq->data);
if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
   nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
@@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
  {
 struct nv50_hw_query *hq;
 struct nv50_query *q;
+   unsigned space;
 hq = CALLOC_STRUCT(nv50_hw_query);
 if (!hq)
@@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
 q->funcs = &hw_query_funcs;
 q->type = type;
-   if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+  hq->rotate = 32;

You should have `hq->rotate` default to 0 in other cases, as IIRC, you have no
guaranty about the value of an uninitialised variable.

CALLOC_STRUCT will be initialize all fields to 0.

Oh, that's nice! Didn't know about it.


+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+  hq->is64bit = true;

Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`.


+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+  space = NV50_HW_QUERY_ALLOC_SPACE;
+  break;
+   default:
+  debug_printf("invalid query type: %u\n", type);
+  FREE(q);
+  return NULL;
+   }
+
+   if (!nv50_hw_query_allocate(nv50, q, space)) {

`space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to
introducing this `space` variable? Do you plan to later add other possible
values to it?

I have a patch locally which reduces the size of that buffer for some
queries,
but this is not really related to this series. I'll submit it later (with
other patches).

One could argue then that you should introduce `space` in those later patches.


space was already here, I just kept it :)



Anyway,
Reviewed-by: Pierre Moreau 


Thanks!


Pierre



FREE(hq);
return NULL;
 }
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+   if (hq->rotate) {
/* we advance before query_begin ! */
-  hq->offset -= 32;
-  hq->data -= 32 / sizeof(*hq->data);
+  hq->offset -= hq->rotate;
+  hq->data -= hq->rotate / sizeof(*hq->data);
 }
-   hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-
 return q;
  }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index ea2bf24..3a53e8a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -24,9 +24,10 @@ struct nv50_hw_query {
 uint32_t sequence;
 struct nouveau_bo *bo;
 uint32_t base_offset;
-   uint32_t offset; /* base + i * 32 */
+   uint32_t offset; /* base + i * rotate */
 uint8_t state;
 bool is64bit;
+   uint8_t rotate;
 int nesting; /* only used for occlusion queries */
 struct nouveau_mm_allocation *mm;
 struct nouveau_fence *fence;
--
2.6.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___

Re: [Mesa-dev] [PATCH v2 1/7] nvc0: fix crash when nv50_miptree_from_handle fails

2015-10-25 Thread Samuel Pitoiset



On 10/22/2015 01:02 AM, Julien Isorce wrote:
Sorry this patch should not have gone in the v2 since it has been 
already reviewed by Emil. But thx for your review.
I experienced the crash when testing patch 5/7 of this patch series, 
around "resource = pscreen->resource_from_handle" in the new 
vaCreateSurface2 function. Just passing a wrong fd.


I checked your remark for nv50 and nv30 and they don't make this step. 
From what I can see, nvc0 re-use nv50_miptree_from_handle from nv50 
but still has its own nvc0_miptree_vtbl. But that's just a guess :)


ACK.
Thanks for your answer.

Do you need someone to push this patch?



Cheers
Julien

On 20 October 2015 at 18:04, samuel.pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


Is there a particular situation where nv50_miptree_from_handle()
fails? And did you check nv50?

Anyway, this patch is:
    Reviewed-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>

On 20/10/2015 18:34, Julien Isorce wrote:

Signed-off-by: Julien Isorce mailto:j.iso...@samsung.com>>
---
  src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
index 12b5a02..15c803c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen
* screen,
 } else {
struct pipe_resource *res =
nv50_miptree_from_handle(screen,
 templ, whandle);
-  nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+  if (res)
+ nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
return res;
 }
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
http://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails

2015-10-25 Thread Samuel Pitoiset



On 10/22/2015 01:16 AM, Julien Isorce wrote:

The real fix is in nouveau_drm_winsys.c by setting dev to 0.
Which means dev's ownership has been passed to previous call.
Other changes are there to be consistent with what the
screen_create functions already do on errors.


This actually happens because nouveau_device_del() is (sometimes) called 
twice

when nvXX_screen_create() fails.

I don't really like this solution but I don't have a better one for now, 
I'll think about

that in the next few days. :)

Note that you forgot to call nouveau_device_del() in nvc0_screen_create().



Encountered this crash because nvc0_screen_create sometimes fails with:
nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: -16
Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354

Signed-off-by: Julien Isorce 
---
  src/gallium/drivers/nouveau/nv30/nv30_screen.c  | 5 -
  src/gallium/drivers/nouveau/nv50/nv50_screen.c  | 4 +++-
  src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++
  3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 0330164..9b8ddac 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev)
 unsigned oclass = 0;
 int ret, i;
  
-   if (!screen)

+   if (!screen) {
+  nouveau_device_del(&dev);
return NULL;
+   }
  
 switch (dev->chipset & 0xf0) {

 case 0x30:
@@ -456,6 +458,7 @@ nv30_screen_create(struct nouveau_device *dev)
  
 if (!oclass) {

NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset);
+  nouveau_device_del(&dev);
FREE(screen);
return NULL;
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index ec51d00..e9604d5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -711,8 +711,10 @@ nv50_screen_create(struct nouveau_device *dev)
 int ret;
  
 screen = CALLOC_STRUCT(nv50_screen);

-   if (!screen)
+   if (!screen) {
+  nouveau_device_del(&dev);
return NULL;
+   }
 pscreen = &screen->base.base;
  
 ret = nouveau_screen_init(&screen->base, dev);

diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c 
b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index c6603e3..bd1d761 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -117,6 +117,8 @@ nouveau_drm_screen_create(int fd)
}
  
  	screen = (struct nouveau_screen*)init(dev);

+   /* Previous init func took ownership of dev */
+   dev = 0;
if (!screen)
goto err;
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/4] nv50: do not create an invalid HW query type

2015-10-28 Thread Samuel Pitoiset
While we are at it, store the rotate offset for occlusion queries to
nv50_hw_query like on nvc0.

Changes since v2:
- remove useless 'space' variable

Signed-off-by: Samuel Pitoiset 
Reviewed-by: Pierre Moreau 
---
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 39 +---
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index fcdd183..945ce7ab 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
nv50_query *q)
 * query might set the initial render condition to false even *after* we re-
 * initialized it to true.
 */
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  hq->offset += 32;
-  hq->data += 32 / sizeof(*hq->data);
+   if (hq->rotate) {
+  hq->offset += hq->rotate;
+  hq->data += hq->rotate / sizeof(*hq->data);
   if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
  nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
 
@@ -339,22 +339,39 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
type, unsigned index)
q->funcs = &hw_query_funcs;
q->type = type;
 
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+  hq->rotate = 32;
+  break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   case PIPE_QUERY_SO_STATISTICS:
+   case PIPE_QUERY_PIPELINE_STATISTICS:
+  hq->is64bit = true;
+  break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+  break;
+   default:
+  debug_printf("invalid query type: %u\n", type);
+  FREE(q);
+  return NULL;
+   }
+
if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
   FREE(hq);
   return NULL;
}
 
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
+   if (hq->rotate) {
   /* we advance before query_begin ! */
-  hq->offset -= 32;
-  hq->data -= 32 / sizeof(*hq->data);
+  hq->offset -= hq->rotate;
+  hq->data -= hq->rotate / sizeof(*hq->data);
}
 
-   hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-
return q;
 }
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 
b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
index fe518a5..294c67d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
@@ -14,9 +14,10 @@ struct nv50_hw_query {
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base_offset;
-   uint32_t offset; /* base + i * 32 */
+   uint32_t offset; /* base + i * rotate */
uint8_t state;
bool is64bit;
+   uint8_t rotate;
int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/4] nv50: move HW queries to nv50_query_hw.c/h files

2015-10-28 Thread Samuel Pitoiset
Changes since v2:
- remove unused 'nv50_hw_query_funcs' struct

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/Makefile.sources   |   2 +
 src/gallium/drivers/nouveau/nv50/nv50_query.c  | 354 ++-
 src/gallium/drivers/nouveau/nv50/nv50_query.h  |  26 +-
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 389 +
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h   |  39 +++
 .../drivers/nouveau/nv50/nv50_shader_state.c   |   7 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c  |   3 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c|   5 +-
 8 files changed, 476 insertions(+), 349 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
 create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h

diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
b/src/gallium/drivers/nouveau/Makefile.sources
index 06d9d97..83f8113 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -74,6 +74,8 @@ NV50_C_SOURCES := \
nv50/nv50_push.c \
nv50/nv50_query.c \
nv50/nv50_query.h \
+   nv50/nv50_query_hw.c \
+   nv50/nv50_query_hw.h \
nv50/nv50_resource.c \
nv50/nv50_resource.h \
nv50/nv50_screen.c \
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 1b4abdb..dd9b85b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -26,334 +26,45 @@
 
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_query.h"
-#include "nv_object.xml.h"
-
-#define NV50_QUERY_STATE_READY   0
-#define NV50_QUERY_STATE_ACTIVE  1
-#define NV50_QUERY_STATE_ENDED   2
-#define NV50_QUERY_STATE_FLUSHED 3
-
-/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
- * (since we use only a single GPU channel per screen) will not work properly.
- *
- * The first is not that big of an issue because OpenGL does not allow nested
- * queries anyway.
- */
-
-#define NV50_QUERY_ALLOC_SPACE 256
-
-static bool
-nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
-{
-   struct nv50_screen *screen = nv50->screen;
-   int ret;
-
-   if (q->bo) {
-  nouveau_bo_ref(NULL, &q->bo);
-  if (q->mm) {
- if (q->state == NV50_QUERY_STATE_READY)
-nouveau_mm_free(q->mm);
- else
-nouveau_fence_work(screen->base.fence.current, 
nouveau_mm_free_work,
-   q->mm);
-  }
-   }
-   if (size) {
-  q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, 
&q->base);
-  if (!q->bo)
- return false;
-  q->offset = q->base;
-
-  ret = nouveau_bo_map(q->bo, 0, screen->base.client);
-  if (ret) {
- nv50_query_allocate(nv50, q, 0);
- return false;
-  }
-  q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
-   }
-   return true;
-}
-
-static void
-nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
-{
-   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
-   nouveau_fence_ref(NULL, &nv50_query(pq)->fence);
-   FREE(nv50_query(pq));
-}
+#include "nv50/nv50_query_hw.h"
 
 static struct pipe_query *
-nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
+nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
 {
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q;
 
-   q = CALLOC_STRUCT(nv50_query);
-   if (!q)
-  return NULL;
-
-   if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
-  FREE(q);
-  return NULL;
-   }
-
-   q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
- type == PIPE_QUERY_PRIMITIVES_EMITTED ||
- type == PIPE_QUERY_SO_STATISTICS ||
- type == PIPE_QUERY_PIPELINE_STATISTICS);
-   q->type = type;
-
-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-  q->offset -= 32;
-  q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
-   }
-
+   q = nv50_hw_create_query(nv50, type, index);
return (struct pipe_query *)q;
 }
 
 static void
-nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q,
-   unsigned offset, uint32_t get)
+nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
 {
-   offset += q->offset;
-
-   PUSH_SPACE(push, 5);
-   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
-   BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4);
-   PUSH_DATAh(push, q->bo->offset + offset);
-   PUSH_DATA (push, q->bo->offset + offset);
-   PUSH_DATA (push, q->sequence);
-   PUSH_DATA (push, get);
+   struct nv50_query *q = nv50_query(pq);
+   q->fun

[Mesa-dev] [PATCH] nvc0: expose a group of performance metrics on Fermi

2015-10-28 Thread Samuel Pitoiset
This allows to monitor those performance metrics through
GL_AMD_performance_monitor.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c   | 14 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_query.h   |  3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c |  2 +-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index e4752e2..f539210 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -28,6 +28,7 @@
 #include "nvc0/nvc0_query.h"
 #include "nvc0/nvc0_query_sw.h"
 #include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
 #include "nvc0/nvc0_query_hw_sm.h"
 
 static struct pipe_query *
@@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 count++;
  } else
  if (screen->base.class_3d < NVE4_3D_CLASS) {
-count++;
+count += 2;
  }
   }
}
@@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 return 1;
  }
   }
+   } else
+   if (id == NVC0_HW_METRIC_QUERY_GROUP) {
+  if (screen->compute) {
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+info->name = "Performance metrics";
+info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
+info->max_active_queries = 1;
+info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
+return 1;
+ }
+  }
}
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
index 6883ab6..c46361c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h
@@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe)
  * Driver queries groups:
  */
 #define NVC0_HW_SM_QUERY_GROUP   0
-#define NVC0_SW_QUERY_DRV_STAT_GROUP 1
+#define NVC0_HW_METRIC_QUERY_GROUP   1
+#define NVC0_SW_QUERY_DRV_STAT_GROUP 2
 
 void nvc0_init_query_functions(struct nvc0_context *);
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 25aa09b..fb2806a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen 
*screen, unsigned id,
 id = nvc0_hw_metric_get_next_query_id(queries, id);
 info->name = nvc0_hw_metric_names[id];
 info->query_type = NVC0_HW_METRIC_QUERY(id);
-info->group_id = -1;
+info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
 return 1;
  }
   }
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/7] nvc0: fix crash when nv50_miptree_from_handle fails

2015-10-28 Thread Samuel Pitoiset



On 10/26/2015 01:44 PM, Julien Isorce wrote:



On 25 October 2015 at 21:38, Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:



Do you need someone to push this patch?


Yes please


Pushed.





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails

2015-10-29 Thread Samuel Pitoiset



On 10/27/2015 02:01 PM, samuel.pitoiset wrote:



On 27/10/2015 12:52, Emil Velikov wrote:
On 27 October 2015 at 10:50, samuel.pitoiset 
 wrote:

On 27/10/2015 11:37, Emil Velikov wrote:

On 22 October 2015 at 00:16, Julien Isorce 
wrote:

The real fix is in nouveau_drm_winsys.c by setting dev to 0.
Which means dev's ownership has been passed to previous call.
Other changes are there to be consistent with what the
screen_create functions already do on errors.

Encountered this crash because nvc0_screen_create sometimes fails 
with:
nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: 
-16

Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354

Signed-off-by: Julien Isorce 
---
   src/gallium/drivers/nouveau/nv30/nv30_screen.c  | 5 -
   src/gallium/drivers/nouveau/nv50/nv50_screen.c  | 4 +++-
   src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++
   3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 0330164..9b8ddac 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev)
  unsigned oclass = 0;
  int ret, i;

-   if (!screen)
+   if (!screen) {
+  nouveau_device_del(&dev);
 return NULL;
+   }


Imho having these in screen_create() seems like the wrong 'layer'.
Shouldn't one call nouveau_device_dev() from within
nouveau_drm_screen_unref
   and explicitly call the latter if the calloc() (here and in 
nv50/nvc0)

fails ?


We can't do that because nouveau_drm_screen_unref() needs a valid
nouveau_screen
object and in this case it is NULL.


Ouch I was under the impression that we've brought back the concept of
winsys in nouveau with the hash_table patches. Seems like we haven't
:(

If we are to do so (split things just like the radeon/amdgpu winsys)
then we can kill two birds with one stone. The missing device_del() on
calloc failure as well as other error paths in nvxx_screen_create().


Okay, I'll have a look at how radeon/amdgpu split those things.


Well, this doesn't seem to be "trivial" to do it properly actually.
This is on my todolist (but not with a top priority) so, if someone
else want to send a patch for this stuff, feel free to do it. :)





I agree that it's not really an elegant fix but we don't really have 
the

choice actually.
In my opinion, this is not that bad.


I never said it's "bad" just the wrong place for the fix. Or in other
words - if we're to fix things might as well do it properly :-)


Sure, I agree. :)



-Emil




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nvc0: add missing compute parameters required by clover

2015-11-03 Thread Samuel Pitoiset
This fixes crashes with some piglit OpenCL tests.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ea317a5..ccaab44 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,7 +353,8 @@ static int
 nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
-   const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+   struct nvc0_screen *screen = nvc0_screen(pscreen);
+   const uint16_t obj_class = screen->compute->oclass;
 
 #define RET(x) do {  \
if (data) \
@@ -384,6 +385,14 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   RET((uint64_t []) { 4096ul });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
   RET((uint32_t []) { 32u });
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+  RET((uint64_t []) { 1ul << 40 });
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+  RET((uint32_t []) { 0u });
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+  RET((uint32_t []) { screen->mp_count_compute });
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+  RET((uint32_t []) { 512u }); /* FIXME: arbitrary limit */
default:
   return 0;
}
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()

2015-11-03 Thread Samuel Pitoiset
To get the size (in bytes) of a compute parameter, clover first calls
get_compute_param() with a NULL data pointer. The RET() macro is based
on nv50.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 --
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6aa4f0b..ea317a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,45 +353,42 @@ static int
 nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
-   uint64_t *data64 = (uint64_t *)data;
-   uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
 
+#define RET(x) do {  \
+   if (data) \
+  memcpy(data, x, sizeof(x));\
+   return sizeof(x); \
+} while (0)
+
switch (param) {
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-  data64[0] = 3;
-  return 8;
+  RET((uint64_t []) { 3ul });
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-  data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535;
-  data64[1] = 65535;
-  data64[2] = 65535;
-  return 24;
+  if (obj_class >= NVE4_COMPUTE_CLASS) {
+ RET(((uint64_t []) { 0x7fff, 65535ul, 65535ul }));
+  } else {
+ RET(((uint64_t []) { 65535ul, 65535ul, 65535ul }));
+  }
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-  data64[0] = 1024;
-  data64[1] = 1024;
-  data64[2] = 64;
-  return 24;
+  RET(((uint64_t []) { 1024ul, 1024ul, 64ul }));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-  data64[0] = 1024;
-  return 8;
+  RET((uint64_t []) { 1024ul });
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
-  data64[0] = (uint64_t)1 << 40;
-  return 8;
+  RET((uint64_t []) { 1ul << 40 });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
-  data64[0] = 48 << 10;
-  return 8;
+  RET((uint64_t []) { 48ul << 10 });
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
-  data64[0] = 512 << 10;
-  return 8;
+  RET((uint64_t []) { 512ul << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
-  data64[0] = 4096;
-  return 8;
+  RET((uint64_t []) { 4096ul });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-  data32[0] = 32;
-  return 4;
+  RET((uint32_t []) { 32u });
default:
   return 0;
}
+
+#undef RET
 }
 
 static void
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()

2015-11-03 Thread Samuel Pitoiset



On 11/03/2015 07:26 PM, Ilia Mirkin wrote:

On Tue, Nov 3, 2015 at 1:35 PM, Samuel Pitoiset
 wrote:

To get the size (in bytes) of a compute parameter, clover first calls
get_compute_param() with a NULL data pointer. The RET() macro is based
on nv50.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 --
  1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6aa4f0b..ea317a5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,45 +353,42 @@ static int
  nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
  {
-   uint64_t *data64 = (uint64_t *)data;
-   uint32_t *data32 = (uint32_t *)data;
 const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;

+#define RET(x) do {  \
+   if (data) \
+  memcpy(data, x, sizeof(x));\
+   return sizeof(x); \
+} while (0)
+
 switch (param) {
 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-  data64[0] = 3;
-  return 8;
+  RET((uint64_t []) { 3ul });
 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-  data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535;
-  data64[1] = 65535;
-  data64[2] = 65535;
-  return 24;
+  if (obj_class >= NVE4_COMPUTE_CLASS) {
+ RET(((uint64_t []) { 0x7fff, 65535ul, 65535ul }));


Why the ul's everywhere? And why not on the 0x7 ?


Based on curro's branch for nv50 compute support, but I assume I can get 
rid of this.





+  } else {
+ RET(((uint64_t []) { 65535ul, 65535ul, 65535ul }));
+  }
 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-  data64[0] = 1024;
-  data64[1] = 1024;
-  data64[2] = 64;
-  return 24;
+  RET(((uint64_t []) { 1024ul, 1024ul, 64ul }));
 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-  data64[0] = 1024;
-  return 8;
+  RET((uint64_t []) { 1024ul });
 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
-  data64[0] = (uint64_t)1 << 40;
-  return 8;
+  RET((uint64_t []) { 1ul << 40 });
 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
-  data64[0] = 48 << 10;
-  return 8;
+  RET((uint64_t []) { 48ul << 10 });
 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
-  data64[0] = 512 << 10;
-  return 8;
+  RET((uint64_t []) { 512ul << 10 });
 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
-  data64[0] = 4096;
-  return 8;
+  RET((uint64_t []) { 4096ul });
 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-  data32[0] = 32;
-  return 4;
+  RET((uint32_t []) { 32u });
 default:
return 0;
 }
+
+#undef RET
  }

  static void
--
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] nvc0: add missing compute parameters required by clover

2015-11-03 Thread Samuel Pitoiset
This fixes crashes with some piglit OpenCL tests.

Changes since v2:
- get rid of ul suffixes when they are unnecessary

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 52ce2d5..6ad3980 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,7 +353,8 @@ static int
 nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
-   const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+   struct nvc0_screen *screen = nvc0_screen(pscreen);
+   const uint16_t obj_class = screen->compute->oclass;
 
 #define RET(x) do {  \
if (data) \
@@ -384,6 +385,14 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
   RET((uint32_t []) { 32 });
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+  RET((uint64_t []) { 1ULL << 40 });
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+  RET((uint32_t []) { 0 });
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+  RET((uint32_t []) { screen->mp_count_compute });
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+  RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
default:
   return 0;
}
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()

2015-11-03 Thread Samuel Pitoiset
To get the size (in bytes) of a compute parameter, clover first calls
get_compute_param() with a NULL data pointer. The RET() macro is based
on nv50.

Changes since v2:
- get rid of ul suffixes when they are unnecessary

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 --
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6aa4f0b..52ce2d5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -353,45 +353,42 @@ static int
 nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
   enum pipe_compute_cap param, void *data)
 {
-   uint64_t *data64 = (uint64_t *)data;
-   uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
 
+#define RET(x) do {  \
+   if (data) \
+  memcpy(data, x, sizeof(x));\
+   return sizeof(x); \
+} while (0)
+
switch (param) {
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-  data64[0] = 3;
-  return 8;
+  RET((uint64_t []) { 3 });
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-  data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535;
-  data64[1] = 65535;
-  data64[2] = 65535;
-  return 24;
+  if (obj_class >= NVE4_COMPUTE_CLASS) {
+ RET(((uint64_t []) { 0x7fff, 65535, 65535 }));
+  } else {
+ RET(((uint64_t []) { 65535, 65535, 65535 }));
+  }
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-  data64[0] = 1024;
-  data64[1] = 1024;
-  data64[2] = 64;
-  return 24;
+  RET(((uint64_t []) { 1024, 1024, 64 }));
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-  data64[0] = 1024;
-  return 8;
+  RET((uint64_t []) { 1024 });
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
-  data64[0] = (uint64_t)1 << 40;
-  return 8;
+  RET((uint64_t []) { 1ULL << 40 });
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
-  data64[0] = 48 << 10;
-  return 8;
+  RET((uint64_t []) { 48 << 10 });
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
-  data64[0] = 512 << 10;
-  return 8;
+  RET((uint64_t []) { 512 << 10 });
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
-  data64[0] = 4096;
-  return 8;
+  RET((uint64_t []) { 4096 });
case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-  data32[0] = 32;
-  return 4;
+  RET((uint32_t []) { 32 });
default:
   return 0;
}
+
+#undef RET
 }
 
 static void
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/hud: document GALLIUM_HUD_PERIOD in envvars.html.

2015-11-04 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 


On 11/04/2015 06:24 AM, Jimmy Berry wrote:

---
  docs/envvars.html | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/docs/envvars.html b/docs/envvars.html
index bdfe999..173c941 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -179,6 +179,8 @@ Mesa EGL supports different sets of environment variables.  
See the
  GALLIUM_HUD - draws various information on the screen, like framerate,
  cpu load, driver statistics, performance counters, etc.
  Set GALLIUM_HUD=help and run e.g. glxgears for more info.
+GALLIUM_HUD_PERIOD - sets the hud update rate in seconds (float). Use zero
+to update every frame. The default period is 1/2 second.
  GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
  rather than stderr.
  GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] gallium/hud: control visibility at startup and runtime.

2015-11-04 Thread Samuel Pitoiset

Hi Jimmy,

Some comments below.

On 11/04/2015 06:17 AM, Jimmy Berry wrote:

- env GALLIUM_HUD_VISIBLE: control default visibility
- env GALLIUM_HUD_SIGNAL_TOGGLE: toggle visibility via signal
---
Thanks for the feedback.

I believe all the suggested changes have been implemented.

One note, all the logic except for the toggle was already in hud_create() and
not hud_draw().

On the subject of allowing the user to specify the signo to use. It was
suggested in the original thread that using a fixed signal might end up stealing
signals from the parent application. Seems like the user should except funny
behavior if they set the signal to something like SIGKILL. I am not opposed to a
fixed signo or alternatively providing a default. Something like:

GALLIUM_HUD_TOGGLE_SIGNAL=-1 # (results in SIGUSR1)


  docs/envvars.html   |  6 ++
  src/gallium/auxiliary/hud/hud_context.c | 29 +
  2 files changed, 35 insertions(+)

diff --git a/docs/envvars.html b/docs/envvars.html
index bdfe999..530bbb7 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -179,6 +179,12 @@ Mesa EGL supports different sets of environment variables. 
 See the
  GALLIUM_HUD - draws various information on the screen, like framerate,
  cpu load, driver statistics, performance counters, etc.
  Set GALLIUM_HUD=help and run e.g. glxgears for more info.
+GALLIUM_HUD_VISIBLE - control default visibility, defaults to true.
+GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal.
+Especially useful to toggle hud at specific points of application and
+disable for unencumbered viewing the rest of the time. For example, set
+GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_SIGNAL_TOGGLE to 10 (SIGUSR1).
+Use kill -10  to toggle the hud as desired.
  GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
  rather than stderr.
  GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment
diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bffbc2f 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -33,6 +33,7 @@
   * Set GALLIUM_HUD=help for more info.
   */

+#include 
  #include 

  #include "hud/hud_context.h"
@@ -51,6 +52,8 @@
  #include "tgsi/tgsi_text.h"
  #include "tgsi/tgsi_dump.h"

+/* controlls the visibility of all hud contexts */


"Control the visibility of all HUD contexts"


+static boolean huds_visible = TRUE;


Maybe, hud_is_hidden or something looks like a better name.



  struct hud_context {
 struct pipe_context *pipe;
@@ -95,6 +98,11 @@ struct hud_context {
 } text, bg, whitelines;
  };

+static void
+signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
+{
+   huds_visible = !huds_visible;
+}

  static void
  hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
@@ -441,6 +449,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
 struct hud_pane *pane;
 struct hud_graph *gr;

+   if (!huds_visible)
+  return;
+
 hud->fb_width = tex->width0;
 hud->fb_height = tex->height0;
 hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
@@ -1125,6 +1136,10 @@ hud_create(struct pipe_context *pipe, struct cso_context 
*cso)
 struct pipe_sampler_view view_templ;
 unsigned i;
 const char *env = debug_get_option("GALLIUM_HUD", NULL);
+   long signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
+   boolean sig_handled = FALSE;
+   struct sigaction action;
+   huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);

 if (!env || !*env)
return NULL;
@@ -1267,6 +1282,20 @@ hud_create(struct pipe_context *pipe, struct cso_context 
*cso)

 LIST_INITHEAD(&hud->pane_list);

+   /* setup sig handler once for all hud contexts */
+   if (!sig_handled) {
+  memset(&action, 0, sizeof(action));


I think you can get rid of this memset() by doing 'struct sigaction 
action = {};' above.



+  action.sa_sigaction = &signal_visible_handler;
+  action.sa_flags = SA_SIGINFO;
+
+  if (signo < 1 || signo >= NSIG)
+ fprintf(stderr, "gallium_hud: invalid signal %ld\n", signo);
+  else if (sigaction(signo, &action, NULL) < 0)
+ fprintf(stderr, "gallium_hud: unable to set handler for signal 
%ld\n", signo);
+
+  sig_handled = TRUE;
+   }
+
 hud_parse_env_var(hud, env);
 return hud;
  }



--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0: enable compute support on Fermi

2015-11-05 Thread Samuel Pitoiset
Altough the compute support is still not complete because textures and
surfaces need to be implemented, it allows to launch very simple compute
kernel like one which reads reading MP performance counters.

This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7d96977..5b7b39b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
   return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
-  return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+  return 1;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
   return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
 
@@ -245,8 +245,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, 
unsigned shader,
  return 0;
   break;
case PIPE_SHADER_COMPUTE:
-  if (class_3d != NVE4_3D_CLASS)
- return 0;
   break;
default:
   return 0;
-- 
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: enable compute support on Fermi

2015-11-05 Thread Samuel Pitoiset



On 11/06/2015 12:43 AM, Ilia Mirkin wrote:

On Thu, Nov 5, 2015 at 6:41 PM, Samuel Pitoiset
 wrote:

Altough the compute support is still not complete because textures and
surfaces need to be implemented, it allows to launch very simple compute
kernel like one which reads reading MP performance counters.


Didn't those end up breaking 3d rendering? Have you figured out what
was overwriting what?


This doesn't break any stuff related to 3D rendering. The compute kernel 
for reading perf counters has been tested a lot on different chips.


The compute support is already enabled on Kepler and it doesn't seem to 
break 3D rendering, btw.


In the series which fixed those perf counters, I actually introduced a 
bug which has been fixed since: fc5ae0c13f71f049065b1422c20491d2264ae164






This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7d96977..5b7b39b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
 case PIPE_CAP_COMPUTE:
-  return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+  return 1;


Of course this also enables it for NVF0_3D_CLASS. Pretty sure compute
doesn't work there for some dumb reason (like we're missing some in
our ctxsw fw...)


 case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;

@@ -245,8 +245,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, 
unsigned shader,
   return 0;
break;
 case PIPE_SHADER_COMPUTE:
-  if (class_3d != NVE4_3D_CLASS)
- return 0;
break;
 default:
return 0;
--
2.5.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: enable compute support on Fermi

2015-11-06 Thread Samuel Pitoiset



On 11/06/2015 11:23 AM, Hans de Goede wrote:

Hi,

On 06-11-15 00:51, Samuel Pitoiset wrote:



On 11/06/2015 12:43 AM, Ilia Mirkin wrote:

On Thu, Nov 5, 2015 at 6:41 PM, Samuel Pitoiset
 wrote:

Altough the compute support is still not complete because textures and
surfaces need to be implemented, it allows to launch very simple
compute
kernel like one which reads reading MP performance counters.


Didn't those end up breaking 3d rendering? Have you figured out what
was overwriting what?


This doesn't break any stuff related to 3D rendering. The compute
kernel for reading perf counters has been tested a lot on different
chips.

The compute support is already enabled on Kepler and it doesn't seem
to break 3D rendering, btw.

In the series which fixed those perf counters, I actually introduced a
bug which has been fixed since: fc5ae0c13f71f049065b1422c20491d2264ae164





This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7d96977..5b7b39b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen
*pscreen, enum pipe_cap param)
 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
 case PIPE_CAP_COMPUTE:
-  return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+  return 1;


Of course this also enables it for NVF0_3D_CLASS. Pretty sure compute
doesn't work there for some dumb reason (like we're missing some in
our ctxsw fw...)


Hmm, my only compute capable card actually is a nvf0 card (gk208 based).
Can you
provide some quick test instructions how I can test compute on that card
(with the patch from this thread applied) ?

And if it does not work, any suggestions how to go about debugging this ?
Or better any info I can provide to help you debug this :)



Unfortunately, the compute support is only supported on Fermi and Kepler 
(< GK110).


I could have a look and implement it for your card but since I don't 
have this chipset, this is not going to be easy.


Anyway, the first step is to trace what the blob does using 
valgrind-mmt. Basically, the vectorAdd sample in CUDA should do the job


http://nouveau.freedesktop.org/wiki/Valgrind-mmt/

Once it's done, please send me the MMT trace.

But the quickest way for you to test that compute support would be to 
have a chip < GK110 :-)



Regards,

Hans


--
-Samuel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support

2015-11-09 Thread Samuel Pitoiset



On 11/09/2015 07:40 PM, Ilia Mirkin wrote:

Signed-off-by: Ilia Mirkin 
---
  docs/GL3.txt|  2 +-
  docs/relnotes/11.1.0.html   |  1 +
  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  |  2 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82 +
  4 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7abdcd8..da0ffca 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40:

GL_MAX_VERTEX_ATTRIB_STRIDE  DONE (all drivers)
GL_ARB_buffer_storageDONE (i965, nv50, 
nvc0, r600, radeonsi)
-  GL_ARB_clear_texture DONE (i965) (gallium - 
in progress, VMware)
+  GL_ARB_clear_texture DONE (i965, nvc0)
GL_ARB_enhanced_layouts  in progress (Timothy)
- compile-time constant expressions  in progress
- explicit byte offsets for blocks   in progress
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 11fbdff..33fd0b8 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with 
certain drivers.
  
  GL_ARB_arrays_of_arrays on i965
  GL_ARB_blend_func_extended on freedreno (a3xx)
+GL_ARB_clear_texture on nvc0
  GL_ARB_copy_image on nv50, nvc0, radeonsi
  GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
  GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f2e3bf0..fbeec7f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
 case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
 case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
return 1;
 case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
 case PIPE_CAP_VERTEXID_NOBASE:
 case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_CLEAR_TEXTURE:
return 0;

 case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 5f47bad..3ae9943 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe,
PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
PUSH_DATA(push, mt->layer_stride >> 2);
PUSH_DATA(push, dst->u.tex.first_layer);
+
+  IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
 } else {
if (res->base.target == PIPE_BUFFER) {
   PUSH_DATA(push, 262144);
@@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
 PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
 BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
 PUSH_DATA (push, dst->u.tex.first_layer);
+   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);

 BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
 for (z = 0; z < sf->depth; ++z) {
@@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
 nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
  }

+static void
+nvc0_clear_texture(struct pipe_context *pipe,
+   struct pipe_resource *res,
+   unsigned level,
+   const struct pipe_box *box,
+   const void *data)
+{
+   struct nv50_miptree *mt = nv50_miptree(res);
+   struct nv50_surface sf = {{{0}}};


I'm just curious about this, does '= {}' is not enough?


+
+   assert(res->target != PIPE_BUFFER);
+
+   sf.base.texture = res;
+   sf.base.format = res->format;
+   sf.base.u.tex.first_layer = box->z;
+   sf.base.u.tex.last_layer = box->depth;
+   sf.base.u.tex.level = level;
+   sf.base.width = sf.width = res->width0 << mt->ms_x;
+   sf.base.height = sf.height = res->height0 << mt->ms_y;
+   sf.depth = box->depth;
+   sf.offset = mt->level[level].offset;
+
+   if (util_format_is_depth_or_stencil(res->format)) {
+  float depth = 0;
+  uint8_t stencil = 0;
+  unsigned clear = 0;
+  const struct util_format_description *desc =
+ util_format_description(res->format);
+
+  if (util_format_has_depth(desc)) {
+ clear |= PIPE_CLEAR_DEPTH;
+ desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+  }
+  if (util_format_has_stencil(desc)) {
+ clear |= PIPE_CLEAR_STENCIL;
+   

Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support

2015-11-09 Thread Samuel Pitoiset



On 11/09/2015 09:03 PM, Ilia Mirkin wrote:

On Mon, Nov 9, 2015 at 2:58 PM, Samuel Pitoiset
 wrote:



On 11/09/2015 07:40 PM, Ilia Mirkin wrote:


Signed-off-by: Ilia Mirkin 
---
   docs/GL3.txt|  2 +-
   docs/relnotes/11.1.0.html   |  1 +
   src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  |  2 +-
   src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82
+
   4 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7abdcd8..da0ffca 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40:

 GL_MAX_VERTEX_ATTRIB_STRIDE  DONE (all
drivers)
 GL_ARB_buffer_storageDONE (i965, nv50,
nvc0, r600, radeonsi)
-  GL_ARB_clear_texture DONE (i965)
(gallium - in progress, VMware)
+  GL_ARB_clear_texture DONE (i965, nvc0)
 GL_ARB_enhanced_layouts  in progress
(Timothy)
 - compile-time constant expressions  in progress
 - explicit byte offsets for blocks   in progress
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 11fbdff..33fd0b8 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with
certain drivers.
   
   GL_ARB_arrays_of_arrays on i965
   GL_ARB_blend_func_extended on freedreno (a3xx)
+GL_ARB_clear_texture on nvc0
   GL_ARB_copy_image on nv50, nvc0, radeonsi
   GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips
   GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f2e3bf0..fbeec7f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
enum pipe_cap param)
  case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
  case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
  case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
 return 1;
  case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
 return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
enum pipe_cap param)
  case PIPE_CAP_VERTEXID_NOBASE:
  case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
  case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_CLEAR_TEXTURE:
 return 0;

  case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 5f47bad..3ae9943 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe,
 PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
 PUSH_DATA(push, mt->layer_stride >> 2);
 PUSH_DATA(push, dst->u.tex.first_layer);
+
+  IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
  } else {
 if (res->base.target == PIPE_BUFFER) {
PUSH_DATA(push, 262144);
@@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
  PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
  BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
  PUSH_DATA (push, dst->u.tex.first_layer);
+   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);

  BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
  for (z = 0; z < sf->depth; ++z) {
@@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
  nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
   }

+static void
+nvc0_clear_texture(struct pipe_context *pipe,
+   struct pipe_resource *res,
+   unsigned level,
+   const struct pipe_box *box,
+   const void *data)
+{
+   struct nv50_miptree *mt = nv50_miptree(res);
+   struct nv50_surface sf = {{{0}}};



I'm just curious about this, does '= {}' is not enough?


I wanted to be *really* sure it got initialized... figured 3 sets was enough :)

But seriously -- allegedly some compilers don't like that. I can't be
bothered to check on the actual situation, so I'm including the 0 in
there. And gcc wanted more {} since the first field was a struct whose
first field was a struct, etc.


Okay, I think it's *really* initialized. :)
But maybe, a memset() call could be better (really doesn't matter).

Anyway, I'm not quite familiar with that part of the driver to add a Rb 
but it looks fine. I hope you did a full piglit run this time. ;)




   -ilia



Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support

2015-11-09 Thread Samuel Pitoiset



On 11/09/2015 09:14 PM, Ilia Mirkin wrote:

On Mon, Nov 9, 2015 at 3:13 PM, Samuel Pitoiset
 wrote:



On 11/09/2015 09:03 PM, Ilia Mirkin wrote:


On Mon, Nov 9, 2015 at 2:58 PM, Samuel Pitoiset
 wrote:




On 11/09/2015 07:40 PM, Ilia Mirkin wrote:



Signed-off-by: Ilia Mirkin 
---
docs/GL3.txt|  2 +-
docs/relnotes/11.1.0.html   |  1 +
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  |  2 +-
src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82
+
4 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7abdcd8..da0ffca 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40:

  GL_MAX_VERTEX_ATTRIB_STRIDE  DONE (all
drivers)
  GL_ARB_buffer_storageDONE (i965,
nv50,
nvc0, r600, radeonsi)
-  GL_ARB_clear_texture DONE (i965)
(gallium - in progress, VMware)
+  GL_ARB_clear_texture DONE (i965,
nvc0)
  GL_ARB_enhanced_layouts  in progress
(Timothy)
  - compile-time constant expressions  in progress
  - explicit byte offsets for blocks   in progress
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 11fbdff..33fd0b8 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with
certain drivers.

GL_ARB_arrays_of_arrays on i965
GL_ARB_blend_func_extended on freedreno (a3xx)
+GL_ARB_clear_texture on nvc0
GL_ARB_copy_image on nv50, nvc0, radeonsi
GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba
chips
GL_ARB_gpu_shader5 on r600 for Evergreen and later chips
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f2e3bf0..fbeec7f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
enum pipe_cap param)
   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
  return 1;
   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
  return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen,
enum pipe_cap param)
   case PIPE_CAP_VERTEXID_NOBASE:
   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_CLEAR_TEXTURE:
  return 0;

   case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 5f47bad..3ae9943 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe,
  PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
  PUSH_DATA(push, mt->layer_stride >> 2);
  PUSH_DATA(push, dst->u.tex.first_layer);
+
+  IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
   } else {
  if (res->base.target == PIPE_BUFFER) {
 PUSH_DATA(push, 262144);
@@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
   PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer +
sf->depth));
   BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
   PUSH_DATA (push, dst->u.tex.first_layer);
+   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);

   BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
   for (z = 0; z < sf->depth; ++z) {
@@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
   nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}

+static void
+nvc0_clear_texture(struct pipe_context *pipe,
+   struct pipe_resource *res,
+   unsigned level,
+   const struct pipe_box *box,
+   const void *data)
+{
+   struct nv50_miptree *mt = nv50_miptree(res);
+   struct nv50_surface sf = {{{0}}};




I'm just curious about this, does '= {}' is not enough?



I wanted to be *really* sure it got initialized... figured 3 sets was
enough :)

But seriously -- allegedly some compilers don't like that. I can't be
bothered to check on the actual situation, so I'm including the 0 in
there. And gcc wanted more {} since the first field was a struct whose
first field was a struct, etc.



Okay, I think it's *really* initialized. :)
But maybe, a memset() call could be better (really doesn't matter).


Should be functionally equivalent, but less verbose.

Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails

2015-11-12 Thread Samuel Pitoiset

Hi Emil,

On 11/10/2015 04:35 PM, Emil Velikov wrote:

Hi Samuel,

Sorry about this I thought I already replied :-\

On 29 October 2015 at 22:22, Samuel Pitoiset  wrote:

On 10/27/2015 02:01 PM, samuel.pitoiset wrote:

On 27/10/2015 12:52, Emil Velikov wrote:


On 27 October 2015 at 10:50, samuel.pitoiset 
wrote:


On 27/10/2015 11:37, Emil Velikov wrote:


On 22 October 2015 at 00:16, Julien Isorce 
wrote:


The real fix is in nouveau_drm_winsys.c by setting dev to 0.
Which means dev's ownership has been passed to previous call.
Other changes are there to be consistent with what the
screen_create functions already do on errors.

Encountered this crash because nvc0_screen_create sometimes fails
with:
nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: -16
Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354

Signed-off-by: Julien Isorce 
---
src/gallium/drivers/nouveau/nv30/nv30_screen.c  | 5 -
src/gallium/drivers/nouveau/nv50/nv50_screen.c  | 4 +++-
src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++
3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 0330164..9b8ddac 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev)
   unsigned oclass = 0;
   int ret, i;

-   if (!screen)
+   if (!screen) {
+  nouveau_device_del(&dev);
  return NULL;
+   }


Imho having these in screen_create() seems like the wrong 'layer'.
Shouldn't one call nouveau_device_dev() from within
nouveau_drm_screen_unref
and explicitly call the latter if the calloc() (here and in
nv50/nvc0)
fails ?



We can't do that because nouveau_drm_screen_unref() needs a valid
nouveau_screen
object and in this case it is NULL.


Ouch I was under the impression that we've brought back the concept of
winsys in nouveau with the hash_table patches. Seems like we haven't
:(

If we are to do so (split things just like the radeon/amdgpu winsys)
then we can kill two birds with one stone. The missing device_del() on
calloc failure as well as other error paths in nvxx_screen_create().



Okay, I'll have a look at how radeon/amdgpu split those things.



Well, this doesn't seem to be "trivial" to do it properly actually.
This is on my todolist (but not with a top priority) so, if someone
else want to send a patch for this stuff, feel free to do it. :)


On the contrary - it's pretty trivial 99% of the work is either code
movement or sed job.
On the other hand, it's might not turn out to be stable material
(rather large diff). So if please a comment or two (something
resembling my suggestion) and get feel free to push it.

Roughly how many things do you have in your mesa todo list prior to
nouveau_winsys ?


Lot of things, mostly related to performance counters! ;)
Fixing a segfault when something else has failed doesn't sound like to 
be a top priority for me. But... I agree this should be fixed, I'll have 
a look this month.




Cheers,
Emil


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   4   5   6   7   8   9   10   >