[Mesa-dev] [RFC PATCH 8/8] nv50: enable GL_AMD_performance_monitor
This exposes a group of global performance counters that enables GL_AMD_performance_monitor. All piglit tests are okay. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 35 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 + 3 files changed, 42 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 062d427..6638e82 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -1566,6 +1566,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, info->name = cfg->event->name; info->query_type = NV50_HW_PM_QUERY(id); + info->group_id = NV50_HW_PM_QUERY_GROUP; info->max_value.u64 = (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0; return 1; @@ -1576,6 +1577,40 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, return 0; } +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, +unsigned id, +struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + // TODO: Check DRM version when nvif will be merged in libdrm! + if (screen->base.perfmon) { + count++; /* NV50_HW_PM_QUERY_GROUP */ + } + + if (!info) + return count; + + if (id == NV50_HW_PM_QUERY_GROUP) { + if (screen->base.perfmon) { + info->name = "Global performance counters"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + info->num_queries = NV50_HW_PM_QUERY_COUNT; + info->max_active_queries = 1; /* TODO: get rid of this limitation! */ + return 1; + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + info->type = 0; + return 0; +} + void nv50_init_query_functions(struct nv50_context *nv50) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index f07798e..dfe20c9 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -746,6 +746,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69127c0..807ae0e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +/* Hardware global performance counters groups. */ +#define NV50_HW_PM_QUERY_GROUP 0 + /* Hardware global performance counters. */ #define NV50_HW_PM_QUERY_COUNT 24 #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i)) @@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen) int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, +struct pipe_driver_query_group_info *); + boolean nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions
This may happen when nv50_query_create() fails to create a new query. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 55fcac8..1162110 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -96,6 +96,9 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { + if (!pq) + return; + nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); nouveau_fence_ref(NULL, &nv50_query(pq)->fence); FREE(nv50_query(pq)); @@ -152,6 +155,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + if (!pq) + return FALSE; + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. @@ -218,6 +224,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + if (!pq) + return; + q->state = NV50_QUERY_STATE_ENDED; switch (q->type) { @@ -294,9 +303,12 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, uint64_t *res64 = (uint64_t *)result; uint32_t *res32 = (uint32_t *)result; boolean *res8 = (boolean *)result; - uint64_t *data64 = (uint64_t *)q->data; + uint64_t *data64; int i; + if (!pq) + return FALSE; + if (q->state != NV50_QUERY_STATE_READY) nv50_query_update(q); @@ -314,6 +326,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, } q->state = NV50_QUERY_STATE_READY; + data64 = (uint64_t *)q->data; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 2/8] nv50: allocate a software object class
This will allow to monitor global performance counters through the command stream of the GPU instead of using ioctls. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_winsys.h | 1 + 3 files changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6583a35..c985344 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); + nouveau_object_del(&screen->sw); nouveau_screen_fini(&screen->base); @@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->tesla->handle); + BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->sw->handle); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); @@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, +NULL, 0, &screen->sw); + if (ret) { + NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret); + goto fail; + } + ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, NULL, 0, &screen->m2mf); if (ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 881051b..69fdfdb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -93,6 +93,7 @@ struct nv50_screen { struct nouveau_object *tesla; struct nouveau_object *eng2d; struct nouveau_object *m2mf; + struct nouveau_object *sw; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index e8578c8..5cb33ef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_COMPUTE(m) 6, (m) #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) +#define SUBC_SW(m) 7, (m) static INLINE uint32_t NV50_FIFO_PKHDR(int subc, int mthd, unsigned size) -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 3/8] nv50: allocate and map a notifier buffer object for PM
This notifier buffer object will be used to read back global performance counters results written by the kernel. For each domain, we will store the handle of the perfdom object, an array of 4 counters and the number of cycles. Like the Gallium's HUD, we keep a list of busy queries in a ring in order to prevent stalls when reading queries. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 29 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 ++ 2 files changed, 35 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index c985344..3a99cc8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); nouveau_object_del(&screen->sw); + nouveau_object_del(&screen->query); nouveau_screen_fini(&screen->base); @@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev) struct nv50_screen *screen; struct pipe_screen *pscreen; struct nouveau_object *chan; + struct nv04_fifo *fifo; uint64_t value; uint32_t tesla_class; unsigned stack_size; + uint32_t length; int ret; screen = CALLOC_STRUCT(nv50_screen); @@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev) screen->base.pushbuf->rsvd_kick = 5; chan = screen->base.channel; + fifo = chan->data; pscreen->destroy = nv50_screen_destroy; pscreen->context_create = nv50_create; @@ -772,6 +776,23 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + /* Compute size (in bytes) of the notifier buffer object which is used +* in order to read back global performance counters results written +* by the kernel. For each domain, we store the handle of the perfdom +* object, an array of 4 counters and the number of cycles. Like for +* the Gallium's HUD, we keep a list of busy queries in a ring in order +* to prevent stalls when reading queries. */ + length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) * + NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4; + + ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS, +&(struct nv04_notify){ .length = length }, +sizeof(struct nv04_notify), &screen->query); + if (ret) { + NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret); + goto fail; + } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, NULL, 0, &screen->sw); if (ret) { @@ -845,6 +866,14 @@ nv50_screen_create(struct nouveau_device *dev) nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify_bo); + if (ret == 0) + nouveau_bo_map(screen->notify_bo, 0, screen->base.client); + if (ret) { + NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret); + goto fail; + } + nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); screen->TPs = util_bitcount(value & 0x); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69fdfdb..71a5247 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -59,6 +59,7 @@ struct nv50_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *stack_bo; struct nouveau_bo *tls_bo; + struct nouveau_bo *notify_bo; unsigned TPs; unsigned MPsInTP; @@ -89,6 +90,7 @@ struct nv50_screen { } fence; struct nouveau_object *sync; + struct nouveau_object *query; struct nouveau_object *tesla; struct nouveau_object *eng2d; @@ -96,6 +98,10 @@ struct nv50_screen { struct nouveau_object *sw; }; +/* Parameters of the ring buffer used to read back global PM counters. */ +#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS 8 +#define NV50_HW_PM_RING_BUFFER_MAX_QUERIES 9 /* HUD_NUM_QUERIES + 1 */ + static INLINE struct nv50_screen * nv50_screen(struct pipe_screen *screen) { -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 7/8] nv50: expose global performance counters to the HUD
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 41 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 3 ++ 3 files changed, 45 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index b9d2914..062d427 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -1535,6 +1535,47 @@ nv50_hw_pm_query_result(struct nv50_context *nv50, struct nv50_query *q, return TRUE; } +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + // TODO: Check DRM version when nvif will be merged in libdrm! + if (screen->base.perfmon) { + nv50_identify_events(screen); + count += NV50_HW_PM_QUERY_COUNT; + } + + if (!info) + return count; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->max_value.u64 = 0; + info->group_id = -1; + + if (id < count) { + if (screen->base.perfmon) { + const struct nv50_hw_pm_query_cfg *cfg = +nv50_hw_pm_query_get_cfg(screen, NV50_HW_PM_QUERY(id)); + + info->name = cfg->event->name; + info->query_type = NV50_HW_PM_QUERY(id); + info->max_value.u64 = +(cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0; + return 1; + } + } + + /* User asked for info about non-existing query. */ + return 0; +} + void nv50_init_query_functions(struct nv50_context *nv50) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 53817c0..f07798e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -745,6 +745,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 0449659..69127c0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -143,6 +143,9 @@ nv50_screen(struct pipe_screen *screen) #define NV50_HW_PM_QUERY_TEX_CACHE_HIT 22 #define NV50_HW_PM_QUERY_TEX_WAITS_FOR_FB 23 +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); + boolean nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters
This commit adds support for both compute and graphics global performance counters which have been reverse engineered with CUPTI (Linux) and PerfKit (Windows). Currently, only one query type can be monitored at the same time because the Gallium's HUD doesn't fit pretty well. This will be improved later. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 1057 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 35 + 2 files changed, 1087 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 1162110..b9d2914 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv_object.xml.h" +#include "nouveau_perfmon.h" + #define NV50_QUERY_STATE_READY 0 #define NV50_QUERY_STATE_ACTIVE 1 #define NV50_QUERY_STATE_ENDED 2 @@ -51,10 +53,25 @@ struct nv50_query { boolean is64bit; struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; + struct nouveau_object *perfdom; }; #define NV50_QUERY_ALLOC_SPACE 256 +#ifdef DEBUG +static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args); +#endif + +static boolean +nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *); +static void +nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *); +static boolean +nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *); +static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *); +static boolean nv50_hw_pm_query_result(struct nv50_context *, +struct nv50_query *, boolean, void *); + static INLINE struct nv50_query * nv50_query(struct pipe_query *pipe) { @@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + if (!pq) return; - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) + nv50_hw_pm_query_destroy(nv50, q); + + nv50_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &q->fence); + FREE(q); } static struct pipe_query * @@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + if (!nv50_hw_pm_query_create(nv50, q)) + return NULL; + } + return (struct pipe_query *)q; } @@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + boolean ret = TRUE; if (!pq) return FALSE; @@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nv50_query_get(push, q, 0x10, 0x5002); break; default: + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + ret = nv50_hw_pm_query_begin(nv50, q); + } break; } q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return ret; } static void @@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) q->state = NV50_QUERY_STATE_READY; break; default: - assert(0); + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + nv50_hw_pm_query_end(nv50, q); + } break; } @@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!pq) return FALSE; + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + return nv50_hw_pm_query_result(nv50, q, wait, result); + } + if (q->state != NV50_QUERY_STATE_READY) nv50_query_update(q); @@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe, nv50_query_end(pipe, targ->pq); } +/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */ + +struct nv50_hw_pm_source_cfg +{ + const char *name; + uint64_t value; +}; + +struct nv50_hw_pm_signal_cfg +{ + const char *name; + const struct nv50_hw_pm_source_cfg src[8]; +}; + +struct nv50_hw_pm_counter_cfg +{ + uint16_t logic_op; + const struct nv50_hw_pm_signal_cfg sig[4]; +}; +
[Mesa-dev] [RFC PATCH 0/8] nv50: expose global performance counters
Hello there, This series exposes NVIDIA's global performance counters for Tesla through the Gallium's HUD and the GL_AMD_performance_monitor extension. This adds support for 24 hardware events which have been reverse engineered with PerfKit (Windows) and CUPTI (Linux). These hardware events will allow developers to profile OpenGL applications. To reduce latency and to improve accuracy, these global performance counters are tied to the command stream of the GPU using a set of software methods instead of ioctls. Results are then written by the kernel to a mapped notifier buffer object that allows the userspace to read back them. However, the libdrm branch which implements the new nvif interface exposed by Nouveau and the software methods interface are not upstream yet. I hope this should done in the next days. The code of this series can be found here: http://cgit.freedesktop.org/~hakzsam/mesa/log/?h=nouveau_perfmon The libdrm branch can be found here: http://cgit.freedesktop.org/~hakzsam/drm/log/?h=nouveau_perfmon The code of the software methods interface can be found here (two last commits): http://cgit.freedesktop.org/~hakzsam/nouveau/log/?h=nouveau_perfmon An other series which exposes global performance counters for Fermi and Kepler will be submitted once I have got enough reviews for this one. Feel free to make a review. Thanks, Samuel. Samuel Pitoiset (8): nouveau: implement the nvif hardware performance counters interface nv50: allocate a software object class nv50: allocate and map a notifier buffer object for PM nv50: configure the ring buffer for reading back PM counters nv50: prevent NULL pointer dereference with pipe_query functions nv50: add support for compute/graphics global performance counters nv50: expose global performance counters to the HUD nv50: enable GL_AMD_performance_monitor src/gallium/drivers/nouveau/Makefile.sources |2 + src/gallium/drivers/nouveau/nouveau_perfmon.c | 302 +++ src/gallium/drivers/nouveau/nouveau_perfmon.h | 59 ++ src/gallium/drivers/nouveau/nouveau_screen.c |5 + src/gallium/drivers/nouveau/nouveau_screen.h |1 + src/gallium/drivers/nouveau/nv50/nv50_query.c | 1148 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 49 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 51 ++ src/gallium/drivers/nouveau/nv50/nv50_winsys.h |1 + 9 files changed, 1612 insertions(+), 6 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 4/8] nv50: configure the ring buffer for reading back PM counters
To write data at the right offset, the kernel has to know some parameters of this ring buffer, like the number of domains and the maximum number of queries. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 3a99cc8..53817c0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -441,6 +441,13 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->sw->handle); + BEGIN_NV04(push, SUBC_SW(0x0190), 1); + PUSH_DATA (push, screen->query->handle); + // XXX: Maybe add a check for DRM version here ? + BEGIN_NV04(push, SUBC_SW(0x0600), 1); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES); + BEGIN_NV04(push, SUBC_SW(0x0604), 1); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS); BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [RFC PATCH 1/8] nouveau: implement the nvif hardware performance counters interface
This commit implements the base interface for hardware performance counters that will be shared between nv50 and nvc0 drivers. TODO: Bump libdrm version of mesa when nvif will be merged. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nouveau_perfmon.c | 302 ++ src/gallium/drivers/nouveau/nouveau_perfmon.h | 59 + src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 1 + 5 files changed, 369 insertions(+) create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 3fae3bc..3da0bdc 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -10,6 +10,8 @@ C_SOURCES := \ nouveau_heap.h \ nouveau_mm.c \ nouveau_mm.h \ + nouveau_perfmon.c \ + nouveau_perfmon.h \ nouveau_screen.c \ nouveau_screen.h \ nouveau_statebuf.h \ diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c b/src/gallium/drivers/nouveau/nouveau_perfmon.c new file mode 100644 index 000..3798612 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c @@ -0,0 +1,302 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "util/u_memory.h" + +#include "nouveau_debug.h" +#include "nouveau_winsys.h" +#include "nouveau_perfmon.h" + +static int +nouveau_perfmon_query_sources(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom, + struct nouveau_perfmon_sig *sig) +{ + struct nvif_perfmon_query_source_v0 args = {}; + + args.domain = dom->id; + args.signal = sig->signal; + do { + uint8_t prev_iter = args.iter; + struct nouveau_perfmon_src *src; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SOURCE, + &args, sizeof(args)); + if (ret) + return ret; + + if (prev_iter) { + args.iter = prev_iter; + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SOURCE, + &args, sizeof(args)); + if (ret) + return ret; + + src = CALLOC_STRUCT(nouveau_perfmon_src); + if (!src) + return -ENOMEM; + +#if 0 + debug_printf("id = %d\n", args.source); + debug_printf("name = %s\n", args.name); + debug_printf("mask = %08x\n", args.mask); + debug_printf("\n"); +#endif + + src->id = args.source; + strncpy(src->name, args.name, sizeof(src->name)); + list_addtail(&src->head, &sig->sources); + } + } while (args.iter != 0xff); + + return 0; +} + +static int +nouveau_perfmon_query_signals(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom) +{ + struct nvif_perfmon_query_signal_v0 args = {}; + + args.domain = dom->id; + do { + uint16_t prev_iter = args.iter; + struct nouveau_perfmon_sig *sig; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SIGNAL, +&args, sizeof(args)); + if (ret) +
Re: [Mesa-dev] [Nouveau] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions
On 06/22/2015 10:52 PM, Ilia Mirkin wrote: If query_create fails, why would any of these functions get called? Because the HUD doesn't check if query_create() fails and it calls other pipe_query functions with NULL pointer instead of a valid query object. On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset wrote: This may happen when nv50_query_create() fails to create a new query. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 55fcac8..1162110 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -96,6 +96,9 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { + if (!pq) + return; + nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); nouveau_fence_ref(NULL, &nv50_query(pq)->fence); FREE(nv50_query(pq)); @@ -152,6 +155,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + if (!pq) + return FALSE; + /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. @@ -218,6 +224,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + if (!pq) + return; + q->state = NV50_QUERY_STATE_ENDED; switch (q->type) { @@ -294,9 +303,12 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, uint64_t *res64 = (uint64_t *)result; uint32_t *res32 = (uint32_t *)result; boolean *res8 = (boolean *)result; - uint64_t *data64 = (uint64_t *)q->data; + uint64_t *data64; int i; + if (!pq) + return FALSE; + if (q->state != NV50_QUERY_STATE_READY) nv50_query_update(q); @@ -314,6 +326,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, } q->state = NV50_QUERY_STATE_READY; + data64 = (uint64_t *)q->data; switch (q->type) { case PIPE_QUERY_GPU_FINISHED: res8[0] = TRUE; -- 2.4.4 ___ Nouveau mailing list nouv...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [RFC PATCH 5/8] nv50: prevent NULL pointer dereference with pipe_query functions
On 06/23/2015 08:57 AM, Michel Dänzer wrote: On 23.06.2015 06:02, Samuel Pitoiset wrote: On 06/22/2015 10:52 PM, Ilia Mirkin wrote: If query_create fails, why would any of these functions get called? Because the HUD doesn't check if query_create() fails and it calls other pipe_query functions with NULL pointer instead of a valid query object. Could the HUD code be fixed instead? It's definitely possible, and probably the best solution instead of preventing NULL pointer dereference in the underlying drivers. I'll make a patch. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] gallium/hud: prevent NULL pointer dereference with pipe_query functions
The HUD doesn't check if query_create() fails and it calls other pipe_query functions with NULL pointer instead of a valid query object. Signed-off-by: Samuel Pitoiset --- src/gallium/auxiliary/hud/hud_driver_query.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c index 603aba7..ee71678 100644 --- a/src/gallium/auxiliary/hud/hud_driver_query.c +++ b/src/gallium/auxiliary/hud/hud_driver_query.c @@ -62,7 +62,8 @@ query_new_value(struct hud_graph *gr) uint64_t now = os_time_get(); if (info->last_time) { - pipe->end_query(pipe, info->query[info->head]); + if (info->query[info->head]) + pipe->end_query(pipe, info->query[info->head]); /* read query results */ while (1) { @@ -70,7 +71,7 @@ query_new_value(struct hud_graph *gr) union pipe_query_result result; uint64_t *res64 = (uint64_t *)&result; - if (pipe->get_query_result(pipe, query, FALSE, &result)) { + if (query && pipe->get_query_result(pipe, query, FALSE, &result)) { info->results_cumulative += res64[info->result_index]; info->num_results++; @@ -88,7 +89,8 @@ query_new_value(struct hud_graph *gr) "gallium_hud: all queries are busy after %i frames, " "can't add another query\n", NUM_QUERIES); - pipe->destroy_query(pipe, info->query[info->head]); + if (info->query[info->head]) + pipe->destroy_query(pipe, info->query[info->head]); info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); } @@ -113,15 +115,15 @@ query_new_value(struct hud_graph *gr) info->results_cumulative = 0; info->num_results = 0; } - - pipe->begin_query(pipe, info->query[info->head]); } else { /* initialize */ info->last_time = now; info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); - pipe->begin_query(pipe, info->query[info->head]); } + + if (info->query[info->head]) + pipe->begin_query(pipe, info->query[info->head]); } static void -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/hud: prevent NULL pointer dereference with pipe_query functions
On 06/25/2015 02:36 PM, Marek Olšák wrote: What's the point of drawing a HUD pane if a query cannot be created? With my series which adds support for global performance counters on NV50, query_create() may fail if we want to monitor *two* different query types with the HUD. This limitation is due to how the HUD uses the pipe_query interface and this doesn't fit well with the underlying interface exposed by Nouveau. In other words, with two different query types the scenario is as follows: CREATE Q1, BEGIN Q1, CREATE Q2, BEGIN Q2, END Q1, RESULT Q1, BEGIN Q1, END Q2, RESULT Q2, BEGIN Q2, END Q1, and so on. But, with nv50/nvc0 drivers I need to schedule hardware counters at query creation and this is going to be pretty hard without a really weird workaround. Hence, only one query type can be monitored simultaneously, and query_create() fails. A better scenario for nouveau drivers will be: CREATE Q1, CREATE Q2, BEGIN Q1, BEGIN Q2, END Q1, END Q2, RESULT Q1, RESULT Q2, BEGIN Q1, and so on. This could allow to introduce, for example, begin_all_queries() and end_all_queries() to be able to create/begin/end all queries in one shot *only*. My plan is to change this behaviour but it will require lot of changes in the HUD mainly because queries are collected by pane. Can we detect this during initialization? I'm not sure if we can detect this at initialization and if this is going to be easy to do. But, how can we handle the case where a driver will only fail one time to create a query? Do we need to remove the pane? Not sure. This is going to be hard to say, especially because nouveau drivers could fail if no hardware counters are available. Marek On Wed, Jun 24, 2015 at 9:26 PM, Samuel Pitoiset wrote: The HUD doesn't check if query_create() fails and it calls other pipe_query functions with NULL pointer instead of a valid query object. Signed-off-by: Samuel Pitoiset --- src/gallium/auxiliary/hud/hud_driver_query.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c index 603aba7..ee71678 100644 --- a/src/gallium/auxiliary/hud/hud_driver_query.c +++ b/src/gallium/auxiliary/hud/hud_driver_query.c @@ -62,7 +62,8 @@ query_new_value(struct hud_graph *gr) uint64_t now = os_time_get(); if (info->last_time) { - pipe->end_query(pipe, info->query[info->head]); + if (info->query[info->head]) + pipe->end_query(pipe, info->query[info->head]); /* read query results */ while (1) { @@ -70,7 +71,7 @@ query_new_value(struct hud_graph *gr) union pipe_query_result result; uint64_t *res64 = (uint64_t *)&result; - if (pipe->get_query_result(pipe, query, FALSE, &result)) { + if (query && pipe->get_query_result(pipe, query, FALSE, &result)) { info->results_cumulative += res64[info->result_index]; info->num_results++; @@ -88,7 +89,8 @@ query_new_value(struct hud_graph *gr) "gallium_hud: all queries are busy after %i frames, " "can't add another query\n", NUM_QUERIES); - pipe->destroy_query(pipe, info->query[info->head]); + if (info->query[info->head]) + pipe->destroy_query(pipe, info->query[info->head]); info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); } @@ -113,15 +115,15 @@ query_new_value(struct hud_graph *gr) info->results_cumulative = 0; info->num_results = 0; } - - pipe->begin_query(pipe, info->query[info->head]); } else { /* initialize */ info->last_time = now; info->query[info->head] = pipe->create_query(pipe, info->query_type, 0); - pipe->begin_query(pipe, info->query[info->head]); } + + if (info->query[info->head]) + pipe->begin_query(pipe, info->query[info->head]); } static void -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [RFC PATCH 3/8] nv50: allocate and map a notifier buffer object for PM
On 06/26/2015 01:02 AM, Ilia Mirkin wrote: On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset wrote: This notifier buffer object will be used to read back global performance counters results written by the kernel. For each domain, we will store the handle of the perfdom object, an array of 4 counters and the number of cycles. Like the Gallium's HUD, we keep a list of busy queries in a ring in order to prevent stalls when reading queries. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 29 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 ++ 2 files changed, 35 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index c985344..3a99cc8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); nouveau_object_del(&screen->sw); + nouveau_object_del(&screen->query); nouveau_screen_fini(&screen->base); @@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev) struct nv50_screen *screen; struct pipe_screen *pscreen; struct nouveau_object *chan; + struct nv04_fifo *fifo; uint64_t value; uint32_t tesla_class; unsigned stack_size; + uint32_t length; int ret; screen = CALLOC_STRUCT(nv50_screen); @@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev) screen->base.pushbuf->rsvd_kick = 5; chan = screen->base.channel; + fifo = chan->data; pscreen->destroy = nv50_screen_destroy; pscreen->context_create = nv50_create; @@ -772,6 +776,23 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + /* Compute size (in bytes) of the notifier buffer object which is used +* in order to read back global performance counters results written +* by the kernel. For each domain, we store the handle of the perfdom +* object, an array of 4 counters and the number of cycles. Like for +* the Gallium's HUD, we keep a list of busy queries in a ring in order +* to prevent stalls when reading queries. */ + length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) * + NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4; This calculation may become apparent to me later, but it certainly isn't now. What's the *6? You refer to an array of 4 counters... should that have been 6 counters? Or should this have been a 4? This refers to the handle of the object, the array of 4 counters and the number of cycles. In other words, for each domain we store: id, ctr0, ctr1, ctr2, ctr3, clk. + + ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS, +&(struct nv04_notify){ .length = length }, +sizeof(struct nv04_notify), &screen->query); + if (ret) { + NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret); + goto fail; + } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, NULL, 0, &screen->sw); if (ret) { @@ -845,6 +866,14 @@ nv50_screen_create(struct nouveau_device *dev) nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify_bo); + if (ret == 0) + nouveau_bo_map(screen->notify_bo, 0, screen->base.client); ret = ... Good catch, thanks. + if (ret) { + NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret); + goto fail; + } + nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); screen->TPs = util_bitcount(value & 0x); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69fdfdb..71a5247 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -59,6 +59,7 @@ struct nv50_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *stack_bo; struct nouveau_bo *tls_bo; + struct nouveau_bo *notify_bo; unsigned TPs; unsigned MPsInTP; @@ -89,6 +90,7 @@ struct nv50_screen { } fence; struct nouveau_object *sync; + struct nouveau_object *query; struct nouveau_object *tesla; struct nouveau_object *eng2d; @@ -96,6 +98,10 @@ struct nv50_screen { struct nouveau_object *sw; }; +/* Parameters of the ring buffer used to read back global PM counters. */ +#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS
Re: [Mesa-dev] [Nouveau] [RFC PATCH 4/8] nv50: configure the ring buffer for reading back PM counters
On 06/26/2015 01:04 AM, Ilia Mirkin wrote: Yeah, this whole thing has to be guarded by a drm version check, otherwise it'll end up with errors in dmesg I assume. Perhaps only allocate screen->query when the drm version matches, and gate things on that for the rest of the code? Yes, this sounds good to me. On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset wrote: To write data at the right offset, the kernel has to know some parameters of this ring buffer, like the number of domains and the maximum number of queries. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 3a99cc8..53817c0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -441,6 +441,13 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->sw->handle); + BEGIN_NV04(push, SUBC_SW(0x0190), 1); + PUSH_DATA (push, screen->query->handle); + // XXX: Maybe add a check for DRM version here ? + BEGIN_NV04(push, SUBC_SW(0x0600), 1); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES); + BEGIN_NV04(push, SUBC_SW(0x0604), 1); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS); FYI you can do BEGIN_NV04(..., 2), since they're sequential. I'm going to make the change. BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); -- 2.4.4 ___ Nouveau mailing list nouv...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/nouveau ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [RFC PATCH 6/8] nv50: add support for compute/graphics global performance counters
On 06/26/2015 01:09 AM, Ilia Mirkin wrote: What's with the \%'s everywhere? Maybe "percent" will be better ? On Mon, Jun 22, 2015 at 4:53 PM, Samuel Pitoiset wrote: This commit adds support for both compute and graphics global performance counters which have been reverse engineered with CUPTI (Linux) and PerfKit (Windows). Currently, only one query type can be monitored at the same time because the Gallium's HUD doesn't fit pretty well. This will be improved later. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 1057 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 35 + 2 files changed, 1087 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 1162110..b9d2914 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv_object.xml.h" +#include "nouveau_perfmon.h" + #define NV50_QUERY_STATE_READY 0 #define NV50_QUERY_STATE_ACTIVE 1 #define NV50_QUERY_STATE_ENDED 2 @@ -51,10 +53,25 @@ struct nv50_query { boolean is64bit; struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; + struct nouveau_object *perfdom; }; #define NV50_QUERY_ALLOC_SPACE 256 +#ifdef DEBUG +static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args); +#endif + +static boolean +nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *); +static void +nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *); +static boolean +nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *); +static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *); +static boolean nv50_hw_pm_query_result(struct nv50_context *, +struct nv50_query *, boolean, void *); + static INLINE struct nv50_query * nv50_query(struct pipe_query *pipe) { @@ -96,12 +113,18 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + if (!pq) return; - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) + nv50_hw_pm_query_destroy(nv50, q); + + nv50_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &q->fence); + FREE(q); } static struct pipe_query * @@ -130,6 +153,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + if (!nv50_hw_pm_query_create(nv50, q)) + return NULL; + } + return (struct pipe_query *)q; } @@ -154,6 +182,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + boolean ret = TRUE; if (!pq) return FALSE; @@ -211,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nv50_query_get(push, q, 0x10, 0x5002); break; default: + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + ret = nv50_hw_pm_query_begin(nv50, q); + } break; } q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return ret; } static void @@ -274,7 +306,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) q->state = NV50_QUERY_STATE_READY; break; default: - assert(0); + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + nv50_hw_pm_query_end(nv50, q); + } break; } @@ -309,6 +343,10 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!pq) return FALSE; + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + return nv50_hw_pm_query_result(nv50, q, wait, result); + } + if (q->state != NV50_QUERY_STATE_READY) nv50_query_update(q); @@ -488,6 +526,1015 @@ nva0_so_target_save_offset(struct pipe_context *pipe, nv50_query_end(pipe, targ->pq); } +/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */ + +struct nv50_hw_pm_source_cfg +{ + const char *name; + uint64_t
[Mesa-dev] [PATCH v2 0/7] nv50: expose global performance counters
Hello there, This series exposes NVIDIA's global performance counters for Tesla through the Gallium's HUD and the GL_AMD_performance_monitor extension. This adds support for 24 hardware events which have been reverse engineered with PerfKit (Windows) and CUPTI (Linux). These hardware events will allow developers to profile OpenGL applications. To reduce latency and to improve accuracy, these global performance counters are tied to the command stream of the GPU using a set of software methods instead of ioctls. Results are then written by the kernel to a mapped notifier buffer object that allows the userspace to read back them. However, the libdrm branch which implements the new nvif interface exposed by Nouveau and the software methods interface are not upstream yet. I hope this should done in the next days. The code of this series can be found here: http://cgit.freedesktop.org/~hakzsam/mesa/log/?h=nouveau_perfmon The libdrm branch can be found here: http://cgit.freedesktop.org/~hakzsam/drm/log/?h=nouveau_perfmon The code of the software methods interface can be found here (two last commits): http://cgit.freedesktop.org/~hakzsam/nouveau/log/?h=nouveau_perfmon An other series which exposes global performance counters for Fermi and Kepler will be submitted once I have got enough reviews for this one. Feel free to make a review. Thanks, Samuel. Samuel Pitoiset (7): nouveau: implement the nvif hardware performance counters interface nv50: allocate a software object class nv50: allocate and map a notifier buffer object for PM nv50: configure the ring buffer for reading back PM counters nv50: add support for compute/graphics global performance counters nv50: expose global performance counters to the HUD nv50: enable GL_AMD_performance_monitor src/gallium/drivers/nouveau/Makefile.sources |2 + src/gallium/drivers/nouveau/nouveau_perfmon.c | 290 ++ src/gallium/drivers/nouveau/nouveau_perfmon.h | 58 ++ src/gallium/drivers/nouveau/nouveau_screen.c |5 + src/gallium/drivers/nouveau/nouveau_screen.h |1 + src/gallium/drivers/nouveau/nv50/nv50_query.c | 1142 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 55 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 51 ++ src/gallium/drivers/nouveau/nv50/nv50_winsys.h |1 + 9 files changed, 1600 insertions(+), 5 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/7] nv50: allocate a software object class
This will allow to monitor global performance counters through the command stream of the GPU instead of using ioctls. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_winsys.h | 1 + 3 files changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6583a35..c985344 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); + nouveau_object_del(&screen->sw); nouveau_screen_fini(&screen->base); @@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->tesla->handle); + BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->sw->handle); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); @@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, +NULL, 0, &screen->sw); + if (ret) { + NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret); + goto fail; + } + ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, NULL, 0, &screen->m2mf); if (ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 881051b..69fdfdb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -93,6 +93,7 @@ struct nv50_screen { struct nouveau_object *tesla; struct nouveau_object *eng2d; struct nouveau_object *m2mf; + struct nouveau_object *sw; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index e8578c8..5cb33ef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_COMPUTE(m) 6, (m) #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) +#define SUBC_SW(m) 7, (m) static INLINE uint32_t NV50_FIFO_PKHDR(int subc, int mthd, unsigned size) -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 4/7] nv50: configure the ring buffer for reading back PM counters
To write data at the right offset, the kernel has to know some parameters of this ring buffer, like the number of domains and the maximum number of queries. Changes since v2: - only configure the ring buffer if the notifier BO is allocated - only use one BEGIN_NV04() Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ab95d65..335bff1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -442,6 +442,16 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->sw->handle); + if (screen->query) { + /* Do not need to configure the ring buffer used to read back + * global performance counters when it is not allocated. */ + BEGIN_NV04(push, SUBC_SW(0x0190), 1); + PUSH_DATA (push, screen->query->handle); + BEGIN_NV04(push, SUBC_SW(0x0600), 2); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS); + } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 6/7] nv50: expose global performance counters to the HUD
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 41 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 3 ++ 3 files changed, 45 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 7fb6f3a..7dadb77 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -1532,6 +1532,47 @@ nv50_hw_pm_query_result(struct nv50_context *nv50, struct nv50_query *q, return TRUE; } +int +nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, + unsigned id, + struct pipe_driver_query_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + // TODO: Check DRM version when nvif will be merged in libdrm! + if (screen->base.perfmon) { + nv50_identify_events(screen); + count += NV50_HW_PM_QUERY_COUNT; + } + + if (!info) + return count; + + /* Init default values. */ + info->name = "this_is_not_the_query_you_are_looking_for"; + info->query_type = 0xdeadd01d; + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64; + info->max_value.u64 = 0; + info->group_id = -1; + + if (id < count) { + if (screen->base.perfmon) { + const struct nv50_hw_pm_query_cfg *cfg = +nv50_hw_pm_query_get_cfg(screen, NV50_HW_PM_QUERY(id)); + + info->name = cfg->event->name; + info->query_type = NV50_HW_PM_QUERY(id); + info->max_value.u64 = +(cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0; + return 1; + } + } + + /* User asked for info about non-existing query. */ + return 0; +} + void nv50_init_query_functions(struct nv50_context *nv50) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 335bff1..ac1acd1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -748,6 +748,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_param = nv50_screen_get_param; pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; + pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 0449659..69127c0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -143,6 +143,9 @@ nv50_screen(struct pipe_screen *screen) #define NV50_HW_PM_QUERY_TEX_CACHE_HIT 22 #define NV50_HW_PM_QUERY_TEX_WAITS_FOR_FB 23 +int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, + struct pipe_driver_query_info *); + boolean nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/7] nv50: allocate and map a notifier buffer object for PM
This notifier buffer object will be used to read back global performance counters results written by the kernel. For each domain, we will store the handle of the perfdom object, an array of 4 counters and the number of cycles. Like the Gallium's HUD, we keep a list of busy queries in a ring in order to prevent stalls when reading queries. Changes since v2: - check return value of nouveau_bo_map() - add a libdrm version check around creating the notifier BO Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 32 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 + 2 files changed, 38 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index c985344..ab95d65 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -368,6 +368,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); nouveau_object_del(&screen->sw); + nouveau_object_del(&screen->query); nouveau_screen_fini(&screen->base); @@ -699,9 +700,11 @@ nv50_screen_create(struct nouveau_device *dev) struct nv50_screen *screen; struct pipe_screen *pscreen; struct nouveau_object *chan; + struct nv04_fifo *fifo; uint64_t value; uint32_t tesla_class; unsigned stack_size; + uint32_t length; int ret; screen = CALLOC_STRUCT(nv50_screen); @@ -727,6 +730,7 @@ nv50_screen_create(struct nouveau_device *dev) screen->base.pushbuf->rsvd_kick = 5; chan = screen->base.channel; + fifo = chan->data; pscreen->destroy = nv50_screen_destroy; pscreen->context_create = nv50_create; @@ -772,6 +776,26 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + // TODO: Update libdrm version when nvif will be merged! + if (dev->drm_version >= 0x01000101) { + /* Compute size (in bytes) of the notifier buffer object which is used + * in order to read back global performance counters results written + * by the kernel. For each domain, we store the handle of the perfdom + * object, an array of 4 counters and the number of cycles. Like for + * the Gallium's HUD, we keep a list of busy queries in a ring in order + * to prevent stalls when reading queries. */ + length = (1 + (NV50_HW_PM_RING_BUFFER_NUM_DOMAINS * 6) * + NV50_HW_PM_RING_BUFFER_MAX_QUERIES) * 4; + + ret = nouveau_object_new(chan, 0xbeef0302, NOUVEAU_NOTIFIER_CLASS, + &(struct nv04_notify){ .length = length }, + sizeof(struct nv04_notify), &screen->query); + if (ret) { + NOUVEAU_ERR("Failed to allocate notifier object for PM: %d\n", ret); + goto fail; + } + } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, NULL, 0, &screen->sw); if (ret) { @@ -845,6 +869,14 @@ nv50_screen_create(struct nouveau_device *dev) nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2); + ret = nouveau_bo_wrap(screen->base.device, fifo->notify, &screen->notify_bo); + if (ret == 0) + ret = nouveau_bo_map(screen->notify_bo, 0, screen->base.client); + if (ret) { + NOUVEAU_ERR("Failed to map notifier object for PM: %d\n", ret); + goto fail; + } + nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); screen->TPs = util_bitcount(value & 0x); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69fdfdb..71a5247 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -59,6 +59,7 @@ struct nv50_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *stack_bo; struct nouveau_bo *tls_bo; + struct nouveau_bo *notify_bo; unsigned TPs; unsigned MPsInTP; @@ -89,6 +90,7 @@ struct nv50_screen { } fence; struct nouveau_object *sync; + struct nouveau_object *query; struct nouveau_object *tesla; struct nouveau_object *eng2d; @@ -96,6 +98,10 @@ struct nv50_screen { struct nouveau_object *sw; }; +/* Parameters of the ring buffer used to read back global PM counters. */ +#define NV50_HW_PM_RING_BUFFER_NUM_DOMAINS 8 +#define NV50_HW_PM_RING_BUFFER_MAX_QUERIES 9 /* HUD_NUM_QUERIES + 1 */ + static INLINE struct nv50_screen * nv50_screen(struct pipe_screen *screen) { -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/7] nouveau: implement the nvif hardware performance counters interface
This commit implements the base interface for hardware performance counters that will be shared between nv50 and nvc0 drivers. TODO: Bump libdrm version of mesa when nvif will be merged. Changes since v2: - remove double-query thing for domains, signals and sources Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nouveau_perfmon.c | 290 ++ src/gallium/drivers/nouveau/nouveau_perfmon.h | 58 ++ src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 1 + 5 files changed, 356 insertions(+) create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 3fae3bc..3da0bdc 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -10,6 +10,8 @@ C_SOURCES := \ nouveau_heap.h \ nouveau_mm.c \ nouveau_mm.h \ + nouveau_perfmon.c \ + nouveau_perfmon.h \ nouveau_screen.c \ nouveau_screen.h \ nouveau_statebuf.h \ diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c b/src/gallium/drivers/nouveau/nouveau_perfmon.c new file mode 100644 index 000..e1d4546 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c @@ -0,0 +1,290 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "util/u_memory.h" + +#include "nouveau_debug.h" +#include "nouveau_winsys.h" +#include "nouveau_perfmon.h" + +static int +nouveau_perfmon_query_sources(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom, + struct nouveau_perfmon_sig *sig) +{ + struct nvif_perfmon_query_source_v0 args = {}; + + args.iter = 1; + args.domain = dom->id; + args.signal = sig->signal; + do { + struct nouveau_perfmon_src *src; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SOURCE, +&args, sizeof(args)); + if (ret) + return ret; + + src = CALLOC_STRUCT(nouveau_perfmon_src); + if (!src) + return -ENOMEM; + +#if 0 + debug_printf("id = %d\n", args.source); + debug_printf("name = %s\n", args.name); + debug_printf("mask = %08x\n", args.mask); + debug_printf("\n"); +#endif + + src->id = args.source; + strncpy(src->name, args.name, sizeof(src->name)); + list_addtail(&src->head, &sig->sources); + } while (args.iter != 0xff); + + return 0; +} + +static int +nouveau_perfmon_query_signals(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom) +{ + struct nvif_perfmon_query_signal_v0 args = {}; + + args.iter = 1; + args.domain = dom->id; + do { + struct nouveau_perfmon_sig *sig; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SIGNAL, +&args, sizeof(args)); + if (ret) + return ret; + + sig = CALLOC_STRUCT(nouveau_perfmon_sig); + if (!sig) + return -ENOMEM; + list_inithead(&sig->sources); + +#if 0 + debug_printf("name = %s\n", args.name); + debug_printf("signal= 0x%02x\n", args.signal); + debug_printf("source_nr = %d\n", args.source_nr); + debug_printf("\n"); +#endif + + sig->signal = args.signal; + strncpy(sig->name, args.name, sizeof(sig->name)); + list_addtail(&sig->head, &dom->signals); + + /* Q
[Mesa-dev] [PATCH v2 7/7] nv50: enable GL_AMD_performance_monitor
This exposes a group of global performance counters that enables GL_AMD_performance_monitor. All piglit tests are okay. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 35 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 + 3 files changed, 42 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 7dadb77..6d57305 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -1563,6 +1563,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, info->name = cfg->event->name; info->query_type = NV50_HW_PM_QUERY(id); + info->group_id = NV50_HW_PM_QUERY_GROUP; info->max_value.u64 = (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0; return 1; @@ -1573,6 +1574,40 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, return 0; } +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, +unsigned id, +struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + // TODO: Check DRM version when nvif will be merged in libdrm! + if (screen->base.perfmon) { + count++; /* NV50_HW_PM_QUERY_GROUP */ + } + + if (!info) + return count; + + if (id == NV50_HW_PM_QUERY_GROUP) { + if (screen->base.perfmon) { + info->name = "Global performance counters"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + info->num_queries = NV50_HW_PM_QUERY_COUNT; + info->max_active_queries = 1; /* TODO: get rid of this limitation! */ + return 1; + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + info->type = 0; + return 0; +} + void nv50_init_query_functions(struct nv50_context *nv50) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ac1acd1..05f921d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -749,6 +749,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69127c0..807ae0e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +/* Hardware global performance counters groups. */ +#define NV50_HW_PM_QUERY_GROUP 0 + /* Hardware global performance counters. */ #define NV50_HW_PM_QUERY_COUNT 24 #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i)) @@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen) int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, +struct pipe_driver_query_group_info *); + boolean nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); -- 2.4.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 5/7] nv50: add support for compute/graphics global performance counters
This commit adds support for both compute and graphics global performance counters which have been reverse engineered with CUPTI (Linux) and PerfKit (Windows). Currently, only one query type can be monitored at the same time because the Gallium's HUD doesn't fit pretty well. This will be improved later. Changes since v2: - replace \% by percentage - remove one extra call to PUSH_SPACE - use nouveau_fence instead of my hand-made fence mechanism Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 1066 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 35 + 2 files changed, 1096 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 81f7474..7fb6f3a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv_object.xml.h" +#include "nouveau_perfmon.h" + #define NV50_QUERY_STATE_READY 0 #define NV50_QUERY_STATE_ACTIVE 1 #define NV50_QUERY_STATE_ENDED 2 @@ -51,10 +53,25 @@ struct nv50_query { boolean is64bit; struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; + struct nouveau_object *perfdom; }; #define NV50_QUERY_ALLOC_SPACE 256 +#ifdef DEBUG +static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args); +#endif + +static boolean +nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *); +static void +nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *); +static boolean +nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *); +static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *); +static boolean nv50_hw_pm_query_result(struct nv50_context *, +struct nv50_query *, boolean, void *); + static INLINE struct nv50_query * nv50_query(struct pipe_query *pipe) { @@ -96,9 +113,15 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) + nv50_hw_pm_query_destroy(nv50, q); + + nv50_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &q->fence); + FREE(q); } static struct pipe_query * @@ -120,6 +143,12 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) type == PIPE_QUERY_PRIMITIVES_EMITTED || type == PIPE_QUERY_SO_STATISTICS || type == PIPE_QUERY_PIPELINE_STATISTICS); + if (type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST) { + /* Hardware global performance counters are not 64 bits, but we also use + * a fence to make sure the query is ready. */ + q->is64bit = TRUE; + } + q->type = type; if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -127,6 +156,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + if (!nv50_hw_pm_query_create(nv50, q)) + return NULL; + } + return (struct pipe_query *)q; } @@ -151,6 +185,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + boolean ret = TRUE; /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- @@ -205,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nv50_query_get(push, q, 0x10, 0x5002); break; default: + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + ret = nv50_hw_pm_query_begin(nv50, q); + } break; } q->state = NV50_QUERY_STATE_ACTIVE; - return true; + return ret; } static void @@ -265,7 +303,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) q->state = NV50_QUERY_STATE_READY; break; default: - assert(0); + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + nv50_hw_pm_query_end(nv50, q); + } break; } @@ -30
[Mesa-dev] [PATCH] nvc0: fix wrong use of BLIT_SRC_Y_INT for 2D texture copy
According to nv50, this should be src->ms_y instead of src->ms_x. This code is here since 2012, so it's probably a typo error which has never been detected since a long time. I didn't do a full piglit run to check if it fixes some other weird issues. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index a820de7..53cd8cd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -189,7 +189,7 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push, PUSH_DATA (push, 0); PUSH_DATA (push, sx << src->ms_x); PUSH_DATA (push, 0); - PUSH_DATA (push, sy << src->ms_x); + PUSH_DATA (push, sy << src->ms_y); return 0; } -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50: fix a SIGSEGV with piglit bin/gl-3.1-vao-broken-attrib
Before validating vertex arrays we need to check if a VBO is present. Checking if vb->buffer is not NULL fixes the issue. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 1fd33b8..3d200bd 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -382,6 +382,11 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) if (nv50->vbo_user & (1 << b)) { address = addrs[b] + ve->pipe.src_offset; limit = addrs[b] + limits[b]; + } else + if (!vb->buffer) { + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1); + PUSH_DATA (push, 0); + continue; } else { struct nv04_resource *buf = nv04_resource(vb->buffer); if (!(refd & (1 << b))) { -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params
What piglit test does this fix? On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin wrote: > Signed-off-by: Ilia Mirkin > Cc: mesa-sta...@lists.freedesktop.org > --- > > Even though in practice a geometry program will never be using UCP's, > we still were revalidating (aka recompiling) the program when more > clip planes became enabled (which also are used for regular clip > distances). > > This seems like it should have led to massive fail, but I guess you > don't change the number of clip planes when using geometry shaders. > But I'm going to put this through a full piglit run just in case > there's something I'm missing. > > src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > index 785e52e..11f2b10 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c > @@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0, >nvc0_vertprog_validate(nvc0); > else > if (likely(vp == nvc0->gmtyprog)) > - nvc0_vertprog_validate(nvc0); > + nvc0_gmtyprog_validate(nvc0); > else >nvc0_tevlprog_validate(nvc0); > } > -- > 2.3.6 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > -- Best regards, Samuel Pitoiset. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] nv50: turn samples counts off during blit
Fixes the following piglit test: occlusion_query_meta_no_fragments Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_surface.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index dc9852d..66eccc2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -1432,6 +1432,7 @@ static void nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; boolean eng3d = FALSE; if (util_format_is_depth_or_stencil(info->dst.resource->format)) { @@ -1493,10 +1494,20 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) info->src.box.height != -info->dst.box.height)) eng3d = TRUE; + if (nv50->screen->num_occlusion_queries_active) { + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } + if (!eng3d) nv50_blit_eng2d(nv50, info); else nv50_blit_3d(nv50, info); + + if (nv50->screen->num_occlusion_queries_active) { + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } } static void -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nv50: add nesting support for occlusion queries
This is loosely based on nvc0. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 27 -- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 2 ++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 81f7474..80d3fd2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -49,6 +49,7 @@ struct nv50_query { uint32_t offset; /* base + i * 32 */ uint8_t state; boolean is64bit; + int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; }; @@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - PUSH_SPACE(push, 4); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 1); + q->nesting = nv50->screen->num_occlusion_queries_active++; + if (q->nesting) { + nv50_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_query_get(push, q, 0x10, 0x06805002); @@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: nv50_query_get(push, q, 0, 0x0100f002); - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 0); + if (--nv50->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_query_get(push, q, 0, 0x06805002); @@ -396,8 +404,7 @@ nv50_render_condition(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { -/* XXX: Placeholder, handle nesting here if available */ -if (unlikely(false)) +if (unlikely(q->nesting)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 881051b..3a12a1f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -54,6 +54,8 @@ struct nv50_screen { struct nv50_context *cur_ctx; struct nv50_graph_state save_state; + int num_occlusion_queries_active; + struct nouveau_bo *code; struct nouveau_bo *uniforms; struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50: limit the maximum number of samplers to 16
NV50_3D_BIND_TSC only allows to bind 16 samplers, and since we don't want to do anything with NV50_3D_BIND_TSC2, just limit the maximum number of samplers to 16 like for nvc0. This fixes dmesg fails with the following piglit test: max-samplers But the test still fails. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6583a35..46ae0b8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -286,7 +286,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: /* The chip could handle more sampler views than samplers */ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - return MIN2(32, PIPE_MAX_SAMPLERS); + return MIN2(16, PIPE_MAX_SAMPLERS); case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nv50: add nesting support for occlusion queries
This is loosely based on nvc0. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 29 -- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 2 ++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 81f7474..a5b95c1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -49,6 +49,7 @@ struct nv50_query { uint32_t offset; /* base + i * 32 */ uint8_t state; boolean is64bit; + int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; }; @@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: - PUSH_SPACE(push, 4); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 1); + q->nesting = nv50->screen->num_occlusion_queries_active++; + if (q->nesting) { + nv50_query_get(push, q, 0x10, 0x0100f002); + } else { + PUSH_SPACE(push, 4); + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 1); + } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_query_get(push, q, 0x10, 0x06805002); @@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: nv50_query_get(push, q, 0, 0x0100f002); - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); - PUSH_DATA (push, 0); + if (--nv50->screen->num_occlusion_queries_active == 0) { + PUSH_SPACE(push, 2); + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); + PUSH_DATA (push, 0); + } break; case PIPE_QUERY_PRIMITIVES_GENERATED: nv50_query_get(push, q, 0, 0x06805002); @@ -319,7 +327,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ - res64[0] = q->data[1]; + res64[0] = q->data[1] - q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ @@ -396,8 +404,7 @@ nv50_render_condition(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: if (likely(!condition)) { -/* XXX: Placeholder, handle nesting here if available */ -if (unlikely(false)) +if (unlikely(q->nesting)) cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : NV50_3D_COND_MODE_ALWAYS; else diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 881051b..3a12a1f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -54,6 +54,8 @@ struct nv50_screen { struct nv50_context *cur_ctx; struct nv50_graph_state save_state; + int num_occlusion_queries_active; + struct nouveau_bo *code; struct nouveau_bo *uniforms; struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] nv50/nvc0: force cache flush for constbufs
This fixes the following piglit test: ext_transform_feedback-immediate-reuse-uniform-buffer I didn't test on nvc0 but this should work as expected. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 2 ++ src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index c698782..932d1c3 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -108,6 +108,8 @@ nv50_constbufs_validate(struct nv50_context *nv50) } } } + + nv50->cb_dirty = 1; /* Force cache flush for constbufs */ } static boolean diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 785e52e..f8a30f2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -464,6 +464,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } } } + + nvc0->cb_dirty = 1; /* Force cache flush for constbufs */ } static void -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nouveau: always align buffers to 0x100
Only constbufs must be aligned to 0x100, but since a TFB buffer can be rebinded as a constant buffer it must be also aligned. This patch prevents this behaviour by aligning everything to 256-byte increments at buffer creation. This fixes dmesg fails for the following piglit test: ext_transform_feedback-immediate-reuse-uniform-buffer -auto -fbo Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nouveau_buffer.c | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index 09cdbb5..83d5288 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -40,12 +40,7 @@ static INLINE boolean nouveau_buffer_allocate(struct nouveau_screen *screen, struct nv04_resource *buf, unsigned domain) { - uint32_t size = buf->base.width0; - - if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER | - PIPE_BIND_COMPUTE_RESOURCE | - PIPE_BIND_SHADER_RESOURCE)) - size = align(size, 0x100); + uint32_t size = align(buf->base.width0, 0x100); if (domain == NOUVEAU_BO_VRAM) { buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size, -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0: add a missing parameter to nvc0_set_shader_images()
This fixes a compilation warning introduced in commit 05a12c5 (gallium: add interface for writable shader images). While we are at it, fix indentation and rename parameters according to the gallium interface. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 337559c..d18b064 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -1125,9 +1125,9 @@ nvc0_set_compute_resources(struct pipe_context *pipe, } static void -nvc0_set_shader_images(struct pipe_context *pipe, - unsigned start, unsigned nr, - struct pipe_image_view **views) +nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader, + unsigned start_slot, unsigned count, + struct pipe_image_view **views) { #if 0 nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views); -- 2.4.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [PATCH] nv50: adjust min/max lod by base level on G80
Reviewed-by: Samuel Pitoiset On 07/20/2015 09:26 AM, Ilia Mirkin wrote: Make the assumption that there's a 1:1 TIC <-> TSC connection, and increase min/max lod by the relevant texture's base level. Also if there's no mipfilter, we have to enable it while forcing min/max lod to the base level. This fixes many, but not all, tex-miplevel-selection tests on G80. Signed-off-by: Ilia Mirkin --- All the textureLod tests fail. If I also adjust the lod_bias by the first_level, then the regular tests start failing. Not sure what the right move is here... need to trace the blob to see what it does here. src/gallium/drivers/nouveau/nv50/nv50_state.c | 1 + .../drivers/nouveau/nv50/nv50_stateobj_tex.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_tex.c| 39 ++ 3 files changed, 41 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index d4d41af..98c4c3a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -464,6 +464,7 @@ nv50_sampler_state_create(struct pipe_context *pipe, struct nv50_tsc_entry *so = MALLOC_STRUCT(nv50_tsc_entry); float f[2]; + so->pipe = *cso; so->id = -1; so->tsc[0] = (0x00026000 | diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h index 99548cb..9a19166 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h @@ -5,6 +5,7 @@ #include "pipe/p_state.h" struct nv50_tsc_entry { + struct pipe_sampler_state pipe; int id; uint32_t tsc[8]; }; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index 17ae27f..d79c813 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -344,6 +344,45 @@ nv50_validate_tsc(struct nv50_context *nv50, int s) PUSH_DATA (push, (i << 4) | 0); continue; } + if (nv50->base.screen->class_3d == NV50_3D_CLASS) { + struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]); + + /* We must make sure that the MIN_LOD is at least set to the first + * level for the G80 + */ + bool need_update = false; + float min_lod = CLAMP( + tic->pipe.u.tex.first_level + tsc->pipe.min_lod, 0.0f, 15.0f); + float max_lod = CLAMP( + tic->pipe.u.tex.first_level + tsc->pipe.max_lod, 0.0f, 15.0f); + + if (tsc->pipe.min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { +uint32_t old_tsc1 = tsc->tsc[1]; +tsc->tsc[1] &= ~NV50_TSC_1_MIPF__MASK; +if (tic->pipe.u.tex.first_level) { + tsc->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + max_lod = min_lod = tic->pipe.u.tex.first_level; +} +if (tsc->tsc[1] != old_tsc1) + need_update = true; + } + + uint32_t new_tsc2 = +(((int)(max_lod * 256.0f) & 0xfff) << 12) | +((int)(min_lod * 256.0f) & 0xfff); + if ((tsc->tsc[2] & 0xff) != new_tsc2) { +tsc->tsc[2] &= ~0xffu; +tsc->tsc[2] |= new_tsc2; +need_update = true; + } + + if (need_update && tsc->id >= 0) { +nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc, +65536 + tsc->id * 32, +NOUVEAU_BO_VRAM, 32, tsc->tsc); +need_flush = TRUE; + } + } if (tsc->id < 0) { tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nv50: force cache flush for Uniform Buffer Objects
This fixes the following piglit test: ext_transform_feedback-immediate-reuse-uniform-buffer Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 1ec5642..9369093 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -99,6 +99,8 @@ nv50_constbufs_validate(struct nv50_context *nv50) PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); + + nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (i << 8) | p | 0); -- 2.4.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0: force cache flush when binding a new ubo
This fixes the following piglit test: ext_transform_feedback-immediate-reuse-uniform-buffer Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index b07558a..2428314 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -455,6 +455,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) PUSH_DATA (push, (i << 4) | 1); BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); + + nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ } else { BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (i << 4) | 0); -- 2.4.6 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/7] nouveau: implement the nvif hardware performance counters interface
On 07/22/2015 10:29 PM, Martin Peres wrote: On 01/07/15 01:01, Samuel Pitoiset wrote: This commit implements the base interface for hardware performance counters that will be shared between nv50 and nvc0 drivers. TODO: Bump libdrm version of mesa when nvif will be merged. Changes since v2: - remove double-query thing for domains, signals and sources Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nouveau_perfmon.c | 290 ++ src/gallium/drivers/nouveau/nouveau_perfmon.h | 58 ++ src/gallium/drivers/nouveau/nouveau_screen.c | 5 + src/gallium/drivers/nouveau/nouveau_screen.h | 1 + 5 files changed, 356 insertions(+) create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.c create mode 100644 src/gallium/drivers/nouveau/nouveau_perfmon.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 3fae3bc..3da0bdc 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -10,6 +10,8 @@ C_SOURCES := \ nouveau_heap.h \ nouveau_mm.c \ nouveau_mm.h \ +nouveau_perfmon.c \ +nouveau_perfmon.h \ nouveau_screen.c \ nouveau_screen.h \ nouveau_statebuf.h \ diff --git a/src/gallium/drivers/nouveau/nouveau_perfmon.c b/src/gallium/drivers/nouveau/nouveau_perfmon.c new file mode 100644 index 000..e1d4546 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_perfmon.c @@ -0,0 +1,290 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "util/u_memory.h" + +#include "nouveau_debug.h" +#include "nouveau_winsys.h" +#include "nouveau_perfmon.h" + +static int +nouveau_perfmon_query_sources(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom, + struct nouveau_perfmon_sig *sig) +{ + struct nvif_perfmon_query_source_v0 args = {}; + + args.iter = 1; Why start iterating from 1 and not 0? Starting from 1 will give you the first source of the signal (because -1 is performed on the kernel side). + args.domain = dom->id; + args.signal = sig->signal; + do { + struct nouveau_perfmon_src *src; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SOURCE, +&args, sizeof(args)); + if (ret) + return ret; You do not check what happens if you do not expose any source for this signal. A test on args.iter != 0x with a return if not the case would be nice! If no sources are exposed for a signal, it will return -EINVAL. But we don't care to handle this because we check if a signal exposes sources before querying them. (cf. perfmon_query_signals()). + + src = CALLOC_STRUCT(nouveau_perfmon_src); + if (!src) + return -ENOMEM; + +#if 0 + debug_printf("id = %d\n", args.source); + debug_printf("name = %s\n", args.name); + debug_printf("mask = %08x\n", args.mask); + debug_printf("\n"); +#endif + + src->id = args.source; + strncpy(src->name, args.name, sizeof(src->name)); + list_addtail(&src->head, &sig->sources); + } while (args.iter != 0xff); + + return 0; +} + +static int +nouveau_perfmon_query_signals(struct nouveau_perfmon *pm, + struct nouveau_perfmon_dom *dom) +{ + struct nvif_perfmon_query_signal_v0 args = {}; + + args.iter = 1; + args.domain = dom->id; + do { + struct nouveau_perfmon_sig *sig; + int ret; + + ret = nouveau_object_mthd(pm->object, NVIF_PERFMON_V0_QUERY_SIGNAL, +&
Re: [Mesa-dev] [PATCH v2 2/7] nv50: allocate a software object class
On 07/22/2015 10:35 PM, Martin Peres wrote: On 01/07/15 01:01, Samuel Pitoiset wrote: This will allow to monitor global performance counters through the command stream of the GPU instead of using ioctls. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 11 +++ src/gallium/drivers/nouveau/nv50/nv50_screen.h | 1 + src/gallium/drivers/nouveau/nv50/nv50_winsys.h | 1 + 3 files changed, 13 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 6583a35..c985344 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -367,6 +367,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_object_del(&screen->eng2d); nouveau_object_del(&screen->m2mf); nouveau_object_del(&screen->sync); + nouveau_object_del(&screen->sw); nouveau_screen_fini(&screen->base); @@ -437,6 +438,9 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->tesla->handle); + BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); + PUSH_DATA (push, screen->sw->handle); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); @@ -768,6 +772,13 @@ nv50_screen_create(struct nouveau_device *dev) goto fail; } + ret = nouveau_object_new(chan, 0xbeef506e, 0x506e, I guess the 0x506e needs to be defined in libdrm, right? According to nvc0, the handle is not defined in libdrm. No need to do that I think, isn't it? Other than that, it is Reviewed-by: Martin Peres +NULL, 0, &screen->sw); + if (ret) { + NOUVEAU_ERR("Failed to allocate SW object: %d\n", ret); + goto fail; + } + ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS, NULL, 0, &screen->m2mf); if (ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 881051b..69fdfdb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -93,6 +93,7 @@ struct nv50_screen { struct nouveau_object *tesla; struct nouveau_object *eng2d; struct nouveau_object *m2mf; + struct nouveau_object *sw; }; static INLINE struct nv50_screen * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index e8578c8..5cb33ef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -60,6 +60,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_COMPUTE(m) 6, (m) #define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) +#define SUBC_SW(m) 7, (m) static INLINE uint32_t NV50_FIFO_PKHDR(int subc, int mthd, unsigned size) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 4/7] nv50: configure the ring buffer for reading back PM counters
On 07/22/2015 10:54 PM, Martin Peres wrote: On 01/07/15 01:01, Samuel Pitoiset wrote: To write data at the right offset, the kernel has to know some parameters of this ring buffer, like the number of domains and the maximum number of queries. Changes since v2: - only configure the ring buffer if the notifier BO is allocated - only use one BEGIN_NV04() Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_screen.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ab95d65..335bff1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -442,6 +442,16 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) BEGIN_NV04(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->sw->handle); + if (screen->query) { + /* Do not need to configure the ring buffer used to read back + * global performance counters when it is not allocated. */ + BEGIN_NV04(push, SUBC_SW(0x0190), 1); + PUSH_DATA (push, screen->query->handle); + BEGIN_NV04(push, SUBC_SW(0x0600), 2); Shouldn't we have the sw method's handle defined in libdrm? Same as patch 2. No need to do it I think. Anyway, patches 3 and 4 are Reviewed-by: Martin Peres + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_MAX_QUERIES); + PUSH_DATA (push, NV50_HW_PM_RING_BUFFER_NUM_DOMAINS); + } + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS); ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 5/7] nv50: add support for compute/graphics global performance counters
On 07/23/2015 12:05 AM, Martin Peres wrote: On 01/07/15 01:01, Samuel Pitoiset wrote: This commit adds support for both compute and graphics global performance counters which have been reverse engineered with CUPTI (Linux) and PerfKit (Windows). Currently, only one query type can be monitored at the same time because the Gallium's HUD doesn't fit pretty well. This will be improved later. Changes since v2: - replace \% by percentage - remove one extra call to PUSH_SPACE - use nouveau_fence instead of my hand-made fence mechanism Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 1066 +++- src/gallium/drivers/nouveau/nv50/nv50_screen.h | 35 + 2 files changed, 1096 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 81f7474..7fb6f3a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -27,6 +27,8 @@ #include "nv50/nv50_context.h" #include "nv_object.xml.h" +#include "nouveau_perfmon.h" + #define NV50_QUERY_STATE_READY 0 #define NV50_QUERY_STATE_ACTIVE 1 #define NV50_QUERY_STATE_ENDED 2 @@ -51,10 +53,25 @@ struct nv50_query { boolean is64bit; struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; + struct nouveau_object *perfdom; }; #define NV50_QUERY_ALLOC_SPACE 256 +#ifdef DEBUG No need to guard the definition of this function. The compiler will get rid of it if it has no users. Fixed. +static void nv50_hw_pm_dump_perfdom(struct nvif_perfdom_v0 *args); +#endif + +static boolean +nv50_hw_pm_query_create(struct nv50_context *, struct nv50_query *); +static void +nv50_hw_pm_query_destroy(struct nv50_context *, struct nv50_query *); +static boolean +nv50_hw_pm_query_begin(struct nv50_context *, struct nv50_query *); +static void nv50_hw_pm_query_end(struct nv50_context *, struct nv50_query *); +static boolean nv50_hw_pm_query_result(struct nv50_context *, +struct nv50_query *, boolean, void *); + static INLINE struct nv50_query * nv50_query(struct pipe_query *pipe) { @@ -96,9 +113,15 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) static void nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) { - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_query *q = nv50_query(pq); + + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) + nv50_hw_pm_query_destroy(nv50, q); + + nv50_query_allocate(nv50, q, 0); + nouveau_fence_ref(NULL, &q->fence); + FREE(q); } static struct pipe_query * @@ -120,6 +143,12 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) type == PIPE_QUERY_PRIMITIVES_EMITTED || type == PIPE_QUERY_SO_STATISTICS || type == PIPE_QUERY_PIPELINE_STATISTICS); + if (type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST) { + /* Hardware global performance counters are not 64 bits, but we also use + * a fence to make sure the query is ready. */ I do not understand the logic of this comment. Only 64-bits queries use a nouveau_fence to make sure result is available. 32-bits queries use a hand-made sequence number. Global PM are declared as 32-bits queries but we also use a nouveau_fence to check the result. I'll rewrite that comment. + q->is64bit = TRUE; + } + q->type = type; if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { @@ -127,6 +156,11 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ } + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <= NV50_HW_PM_QUERY_LAST)) { + if (!nv50_hw_pm_query_create(nv50, q)) + return NULL; + } + return (struct pipe_query *)q; } @@ -151,6 +185,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_query *q = nv50_query(pq); + boolean ret = TRUE; /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- @@ -205,10 +240,13 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) nv50_query_get(push, q, 0x10, 0x5002); break; default: + if ((q->type >= NV50_HW_PM_QUERY(0) && q->type
Re: [Mesa-dev] [PATCH v2 7/7] nv50: enable GL_AMD_performance_monitor
On 07/23/2015 12:14 AM, Martin Peres wrote: On 01/07/15 01:01, Samuel Pitoiset wrote: This exposes a group of global performance counters that enables GL_AMD_performance_monitor. All piglit tests are okay. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 35 ++ src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.h | 6 + 3 files changed, 42 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 7dadb77..6d57305 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -1563,6 +1563,7 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, info->name = cfg->event->name; info->query_type = NV50_HW_PM_QUERY(id); + info->group_id = NV50_HW_PM_QUERY_GROUP; info->max_value.u64 = (cfg->event->display == NV50_HW_PM_EVENT_DISPLAY_RATIO) ? 100 : 0; return 1; @@ -1573,6 +1574,40 @@ nv50_screen_get_driver_query_info(struct pipe_screen *pscreen, return 0; } +int +nv50_screen_get_driver_query_group_info(struct pipe_screen *pscreen, +unsigned id, +struct pipe_driver_query_group_info *info) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + int count = 0; + + // TODO: Check DRM version when nvif will be merged in libdrm! + if (screen->base.perfmon) { + count++; /* NV50_HW_PM_QUERY_GROUP */ + } + + if (!info) + return count; + + if (id == NV50_HW_PM_QUERY_GROUP) { + if (screen->base.perfmon) { + info->name = "Global performance counters"; + info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + info->num_queries = NV50_HW_PM_QUERY_COUNT; + info->max_active_queries = 1; /* TODO: get rid of this limitation! */ + return 1; + } + } + + /* user asked for info about non-existing query group */ + info->name = "this_is_not_the_query_group_you_are_looking_for"; + info->max_active_queries = 0; + info->num_queries = 0; + info->type = 0; + return 0; +} + void nv50_init_query_functions(struct nv50_context *nv50) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ac1acd1..05f921d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -749,6 +749,7 @@ nv50_screen_create(struct nouveau_device *dev) pscreen->get_shader_param = nv50_screen_get_shader_param; pscreen->get_paramf = nv50_screen_get_paramf; pscreen->get_driver_query_info = nv50_screen_get_driver_query_info; + pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info; nv50_screen_init_resource_functions(pscreen); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 69127c0..807ae0e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -114,6 +114,9 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } +/* Hardware global performance counters groups. */ +#define NV50_HW_PM_QUERY_GROUP 0 + /* Hardware global performance counters. */ #define NV50_HW_PM_QUERY_COUNT 24 #define NV50_HW_PM_QUERY(i)(PIPE_QUERY_DRIVER_SPECIFIC + (i)) @@ -146,6 +149,9 @@ nv50_screen(struct pipe_screen *screen) int nv50_screen_get_driver_query_info(struct pipe_screen *, unsigned, struct pipe_driver_query_info *); +int nv50_screen_get_driver_query_group_info(struct pipe_screen *, unsigned, +struct pipe_driver_query_group_info *); + boolean nv50_blitter_create(struct nv50_screen *); void nv50_blitter_destroy(struct nv50_screen *); Everything looks good to me! Excellent work Samuel! Reviewed-by: Martin Peres Thanks for reviewing all the series martin. IIRC, the kernel patches were supposed to land in 4.2 as there was no pull request from Ben, it will likely end up in 4.3. As for the libdrm patches, did you review them? Looking forward to seeing this series merged! Yeah, I hope they will be merged in 4.3. Anyway, Ben still have to review the software methods interface and to add support of nvif in libdrm. I'll ping him in the next few days. :) It would also be nice to start a discussion to rework the Gallium HUD to avoid the stupid problem that we have where we can only monitor one signal at a time! Yeah, this is going to be a bit hard to rework but that's required to monitor more than only one hardware global performance counters.
Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
On 10/10/2015 09:58 PM, Ilia Mirkin wrote: On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset wrote: On 10/10/2015 09:42 PM, Ilia Mirkin wrote: On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset wrote: This patch looks fine except that it should be a bit more normalized. I mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for PUSH_SPACE calls, sometimes you add it sometimes not. Meh. We need to get our error checking situation straight, but this isn't the patch to do it in. Yeah, but this needs to be clarified. What does? I mean, we should either use PUSH_SPACE everywhere or not at all, and always breaks (or not) when PUSH_SPACE fails. That's really a minor issue. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
On 10/10/2015 10:17 PM, Ilia Mirkin wrote: On Sat, Oct 10, 2015 at 4:21 PM, Samuel Pitoiset wrote: On 10/10/2015 09:58 PM, Ilia Mirkin wrote: On Sat, Oct 10, 2015 at 3:55 PM, Samuel Pitoiset wrote: On 10/10/2015 09:42 PM, Ilia Mirkin wrote: On Sat, Oct 10, 2015 at 3:41 PM, Samuel Pitoiset wrote: This patch looks fine except that it should be a bit more normalized. I mean, sometimes you break when PUSH_SPACE fails, sometimes not. Same for PUSH_SPACE calls, sometimes you add it sometimes not. Meh. We need to get our error checking situation straight, but this isn't the patch to do it in. Yeah, but this needs to be clarified. What does? I mean, we should either use PUSH_SPACE everywhere or not at all, and always breaks (or not) when PUSH_SPACE fails. That's really a minor issue. It's actually a major issue. Error-handling is practically non-existent. There are a couple of spots here and there, but it doesn't really scale up. I guess I (semi-)accidentally removed a couple of spots that error checked, but, again, meh. Doing this for real will require some careful thought. Yeah, okay. So we really need to improve error-handling. :) -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Nouveau] [PATCH] nv50, nvc0: don't base decisions on available pushbuf space
I did a full piglit run on Fermi. There are no regressions and you fixed texelFetch tests and other ones which failed with that assert. I'm lazy to do it on Tesla, so: Reviewed-by: Samuel Pitoiset Thanks! On 10/10/2015 11:09 AM, Ilia Mirkin wrote: We still have to push everything out, might as well kick earlier and flip pushbufs when we know we'll need it. This resolves some issues with the new policy of making sure that we always leave a bit of room at the end for fences. Signed-off-by: Ilia Mirkin Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/nouveau/nv50/nv50_shader_state.c | 9 ++--- src/gallium/drivers/nouveau/nv50/nv50_transfer.c | 16 +++- src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c | 20 +--- 3 files changed, 10 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index fdde11f..941555f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50) PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); } while (words) { - unsigned nr; - - if (!PUSH_SPACE(push, 16)) - break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); + PUSH_SPACE(push, nr + 3); BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (start << 8) | b); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index be51407..9a3fd1e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, PUSH_DATA (push, 0); while (count) { - unsigned nr; - - if (!PUSH_SPACE(push, 16)) - break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 1); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); + unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); PUSH_DATAp(push, src, nr); @@ -395,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (words) { - unsigned nr; - - nr = PUSH_AVAIL(push); - nr = MIN2(nr - 7, words); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN); + PUSH_SPACE(push, nr + 7); BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c index aaec60a..d459dd6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (count) { - unsigned nr; + unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN); - if (!PUSH_SPACE(push, 16)) + if (!PUSH_SPACE(push, nr + 9)) break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 9); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN); BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); PUSH_DATAh(push, dst->offset + offset); @@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv, nouveau_pushbuf_validate(push); while (count) { - unsigned nr; + unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1)); - if (!PUSH_SPACE(push, 16)) + if (!PUSH_SPACE(push, nr + 10)) break; - nr = PUSH_AVAIL(push); - assert(nr >= 16); - nr = MIN2(count, nr - 8); - nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1)); BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, dst->offset + offset); @@ -571,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv, PUSH_DATA (push, bo->offset + base); while (words) { - unsigned nr = PUSH_AVAIL(push); - nr = MIN2(nr, words); - nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1); + unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1); PUSH_SPACE(push, nr + 2); PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain); ___ mesa-dev mailing list mesa
[Mesa-dev] [PATCH 06/16] nvc0: allow to use 8 MP counters on Fermi
On Fermi, we have one domain of 8 MP counters while we have two domains of 4 MP counters on Kepler. Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 30 +- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h| 2 +- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index b810d25..2060662 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -144,7 +144,7 @@ struct nvc0_hw_sm_counter_cfg struct nvc0_hw_sm_query_cfg { - struct nvc0_hw_sm_counter_cfg ctr[4]; + struct nvc0_hw_sm_counter_cfg ctr[8]; uint8_t num_counters; uint8_t op; uint8_t norm[2]; /* normalization num,denom */ @@ -418,7 +418,6 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); const struct nvc0_hw_sm_query_cfg *cfg; unsigned i, c; - unsigned num_ab[2] = { 0, 0 }; if (screen->base.class_3d >= NVE4_3D_CLASS) return nve4_hw_sm_begin_query(nvc0, hq); @@ -426,17 +425,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq); /* check if we have enough free counter slots */ - for (i = 0; i < cfg->num_counters; ++i) - num_ab[cfg->ctr[i].sig_dom]++; - - if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 || - screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) { + if (screen->pm.num_hw_sm_active[0] + cfg->num_counters > 8) { NOUVEAU_ERR("Not enough free MP counter slots !\n"); return false; } - assert(cfg->num_counters <= 4); - PUSH_SPACE(push, 4 * 8 * 6 + 4); + assert(cfg->num_counters <= 8); + PUSH_SPACE(push, 4 * 8 * 6 + 2); /* set sequence field to 0 (used to check if result is available) */ for (i = 0; i < screen->mp_count; ++i) { @@ -446,23 +441,21 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) hq->sequence++; for (i = 0; i < cfg->num_counters; ++i) { - const unsigned d = cfg->ctr[i].sig_dom; unsigned s; - if (!screen->pm.num_hw_sm_active[d]) { + if (!screen->pm.num_hw_sm_active[0]) { BEGIN_NVC0(push, SUBC_SW(0x0600), 1); PUSH_DATA (push, 0x8000); } - screen->pm.num_hw_sm_active[d]++; + screen->pm.num_hw_sm_active[0]++; - for (c = d * 4; c < (d * 4 + 4); ++c) { + for (c = 0; c < 8; ++c) { if (!screen->pm.mp_counter[c]) { hsq->ctr[i] = c; screen->pm.mp_counter[c] = hsq; break; } } - assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ /* configure and reset the counter(s) */ for (s = 0; s < cfg->ctr[i].num_src; s++) { @@ -522,7 +515,8 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) /* release counters for this query */ for (c = 0; c < 8; ++c) { if (screen->pm.mp_counter[c] == hsq) { - screen->pm.num_hw_sm_active[c / 4]--; + uint8_t d = is_nve4 ? c / 4 : 0; /* only one domain for NVC0:NVE4 */ + screen->pm.num_hw_sm_active[d]--; screen->pm.mp_counter[c] = NULL; } } @@ -568,7 +562,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) } static inline bool -nvc0_hw_sm_query_read_data(uint32_t count[32][4], +nvc0_hw_sm_query_read_data(uint32_t count[32][8], struct nvc0_context *nvc0, bool wait, struct nvc0_hw_query *hq, const struct nvc0_hw_sm_query_cfg *cfg, @@ -594,7 +588,7 @@ nvc0_hw_sm_query_read_data(uint32_t count[32][4], } static inline bool -nve4_hw_sm_query_read_data(uint32_t count[32][4], +nve4_hw_sm_query_read_data(uint32_t count[32][8], struct nvc0_context *nvc0, bool wait, struct nvc0_hw_query *hq, const struct nvc0_hw_sm_query_cfg *cfg, @@ -640,7 +634,7 @@ static boolean nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query *hq, boolean wait, union pipe_query_result *result) { - uint32_t count[32][4]; + uint32_t count[32][8]; uint64_t value = 0; unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32); unsigned p, c; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h index bab6f34..0ad8a91 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.h @@ -5,7 +5,7 @@ struct nvc0_hw_sm_q
[Mesa-dev] [PATCH 02/16] nvc0: split out begin_query() hook used by MP counters
The way we configure MP performance counters is going to pretty different between Fermi and Kepler. Having two separate functions is much better. Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 108 - 1 file changed, 84 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 8e2239f..f83966a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -338,16 +338,91 @@ nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) } static boolean +nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) +{ + struct nvc0_screen *screen = nvc0->screen; + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); + const struct nvc0_hw_sm_query_cfg *cfg; + unsigned i, c; + unsigned num_ab[2] = { 0, 0 }; + + cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq); + + /* check if we have enough free counter slots */ + for (i = 0; i < cfg->num_counters; ++i) + num_ab[cfg->ctr[i].sig_dom]++; + + if (screen->pm.num_hw_sm_active[0] + num_ab[0] > 4 || + screen->pm.num_hw_sm_active[1] + num_ab[1] > 4) { + NOUVEAU_ERR("Not enough free MP counter slots !\n"); + return false; + } + + assert(cfg->num_counters <= 4); + PUSH_SPACE(push, 4 * 8 * + 6); + + if (!screen->pm.mp_counters_enabled) { + screen->pm.mp_counters_enabled = true; + BEGIN_NVC0(push, SUBC_SW(0x06ac), 1); + PUSH_DATA (push, 0x1fcb); + } + + /* set sequence field to 0 (used to check if result is available) */ + for (i = 0; i < screen->mp_count; ++i) + hq->data[i * 10 + 10] = 0; + hq->sequence++; + + for (i = 0; i < cfg->num_counters; ++i) { + const unsigned d = cfg->ctr[i].sig_dom; + + if (!screen->pm.num_hw_sm_active[d]) { + uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); + if (screen->pm.num_hw_sm_active[!d]) +m |= 1 << (7 + (8 * d)); + BEGIN_NVC0(push, SUBC_SW(0x0600), 1); + PUSH_DATA (push, m); + } + screen->pm.num_hw_sm_active[d]++; + + for (c = d * 4; c < (d * 4 + 4); ++c) { + if (!screen->pm.mp_counter[c]) { +hsq->ctr[i] = c; +screen->pm.mp_counter[c] = hsq; +break; + } + } + assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ + + /* configure and reset the counter(s) */ + if (d == 0) +BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); + else +BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); + PUSH_DATA (push, cfg->ctr[i].sig_sel); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); + PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); + PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); + BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); + PUSH_DATA (push, 0); + } + return true; +} + +static boolean nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) { struct nvc0_screen *screen = nvc0->screen; struct nouveau_pushbuf *push = nvc0->base.pushbuf; - const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); const struct nvc0_hw_sm_query_cfg *cfg; unsigned i, c; unsigned num_ab[2] = { 0, 0 }; + if (screen->base.class_3d >= NVE4_3D_CLASS) + return nve4_hw_sm_begin_query(nvc0, hq); + cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq); /* check if we have enough free counter slots */ @@ -361,7 +436,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) } assert(cfg->num_counters <= 4); - PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6); + PUSH_SPACE(push, 4 * 8 * 6 + 6); if (!screen->pm.mp_counters_enabled) { screen->pm.mp_counters_enabled = true; @@ -376,6 +451,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) for (i = 0; i < cfg->num_counters; ++i) { const unsigned d = cfg->ctr[i].sig_dom; + unsigned s; if (!screen->pm.num_hw_sm_active[d]) { uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); @@ -396,31 +472,15 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */ /* configure and reset the counter(s) */ - if (is_nve4) { - if (d == 0) -BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); - else -
[Mesa-dev] [PATCH 05/16] nvc0: fix sequence field init for MP counters on Fermi
Sequence fields are located at MP[i] + 0x20 in the buffer object. This is used to check if result is available for MP[i]. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index f7b49da..b810d25 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -439,8 +439,10 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) PUSH_SPACE(push, 4 * 8 * 6 + 4); /* set sequence field to 0 (used to check if result is available) */ - for (i = 0; i < screen->mp_count; ++i) - hq->data[i * 10 + 10] = 0; + for (i = 0; i < screen->mp_count; ++i) { + const unsigned b = (0x24 / 4) * i; + hq->data[b + 8] = 0; + } hq->sequence++; for (i = 0; i < cfg->num_counters; ++i) { -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/16] nvc0: fix queries which use multiple MP counters on Fermi
Queries which use more than one MP counters was misconfigured and computing the final result was also wrong because sources need to be configured on different hardware counters instead. According to the blob, computing the result is now as follows: FOR i..n val += ctr[i] * pow(2, i) Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 128 + 1 file changed, 81 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 2060662..99e9073 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -131,7 +131,7 @@ struct nvc0_hw_sm_counter_cfg uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */ uint32_t sig_sel : 8; /* signal group */ - uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */ + uint32_t src_sel; /* signal selection for up to 4 sources */ }; #define NVC0_COUNTER_OPn_SUM0 @@ -280,44 +280,82 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = 0x80001de7ULL }; -#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } +#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s } +#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = { \ + { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\ +} static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = { - _Q(ACTIVE_CYCLES, 0x, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(ACTIVE_WARPS,0x, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), - _Q(ATOM_COUNT, 0x, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(BRANCH, 0x, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), - _Q(DIVERGENT_BRANCH,0x, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), - _Q(GLD_REQUEST, 0x, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(GRED_COUNT, 0x, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(GST_REQUEST, 0x, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_EXECUTED, 0x, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), - _Q(INST_ISSUED1_0, 0x, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED1_1, 0x, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED2_0, 0x, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED2_1, 0x, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LOCAL_LD,0x, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LOCAL_ST,0x, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_0, 0x, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_1, 0x, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_2, 0x, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_3, 0x, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_4, 0x, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_5, 0x, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_6, 0x, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_7, 0x, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(SHARED_LD, 0x, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(SHARED_ST, 0x, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(THREADS_LAUNCHED,0x, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), - _Q(TH_INST_EXECUTED_0, 0x, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_1, 0x, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_2, 0x, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_3, 0x, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(WARPS_LAUNCHED, 0x, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), + _Q(ACTIVE_CYCLES, 1, _C(0x, LOGOP, 0x11, 0x)), + _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x0010), + _C(0x, LOGOP, 0x24, 0x0021), + _C(0x, LOGOP, 0x24, 0x0032), + _C(0
[Mesa-dev] [PATCH 01/16] nvc0: remove useless call to query_get_cfg() in nvc0_hw_sm_query_end()
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 3bdb90a..8e2239f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -439,9 +439,6 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint grid[3] = { screen->mp_count, 1, 1 }; unsigned c; - const struct nvc0_hw_sm_query_cfg *cfg; - - cfg = nvc0_hw_sm_query_get_cfg(nvc0, hq); if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); @@ -495,6 +492,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) PUSH_SPACE(push, 16); mask = 0; for (c = 0; c < 8; ++c) { + const struct nvc0_hw_sm_query_cfg *cfg; unsigned i; hsq = screen->pm.mp_counter[c]; -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/16] nvc0: correctly enable the MP counters' multiplexer on Fermi
Writing 0x408000 to 0x419e00 (like on Kepler) has no effect on Fermi because we only have one domain of 8 counters. Instead, we have to write 0x8000. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index a74bfee..f7b49da 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -448,11 +448,8 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) unsigned s; if (!screen->pm.num_hw_sm_active[d]) { - uint32_t m = (1 << 22) | (1 << (7 + (8 * !d))); - if (screen->pm.num_hw_sm_active[!d]) -m |= 1 << (7 + (8 * d)); BEGIN_NVC0(push, SUBC_SW(0x0600), 1); - PUSH_DATA (push, m); + PUSH_DATA (push, 0x8000); } screen->pm.num_hw_sm_active[d]++; -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/16] nvc0: rip off the kepler MP-enabling logic from the Fermi codepath
Writing 0x1fcb to 0x419eac is definitely not related to MP counters and has no effect on Fermi (although this enables MP counters on Kepler). Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index f83966a..a74bfee 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -436,13 +436,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) } assert(cfg->num_counters <= 4); - PUSH_SPACE(push, 4 * 8 * 6 + 6); - - if (!screen->pm.mp_counters_enabled) { - screen->pm.mp_counters_enabled = true; - BEGIN_NVC0(push, SUBC_SW(0x06ac), 1); - PUSH_DATA (push, 0x1fcb); - } + PUSH_SPACE(push, 4 * 8 * 6 + 4); /* set sequence field to 0 (used to check if result is available) */ for (i = 0; i < screen->mp_count; ++i) -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/16] nvc0: improve MP counters support on Fermi
Hello, This series fixes some issues related to MP performance counters on Fermi. MP counters for GF100/GF110 have also been improved because they are compute capability 2.0 while the other Fermi chipsets are 2.1 and some HW events are different. Compute support is now enabled by default on Fermi because I can't reproduce those weird effects on 3D state. This has been probably fixed as a side effect. Anyway, if someone complain about it, I'll be glad to fix it. This series has been tested with the following apps: - xonotic-glx - heaven - valley - glxgears, glxspheres64 and so on ... And with the following Fermi chipsets: - GF100 - GF108 - GF110 - GF114 - GF116 - GF119 Note that with GF100/GF110, some MP counters are not correctly context-switched and results might be wrong. This is a known issue that we need to fix on the Nouveau side. There are no regressions with piglit. I'll submit an other series in the next few days which adds some performance monitoring metrics on Fermi and I'll double check MP counters on Kepler. Thanks. Samuel Pitoiset (16): nvc0: remove useless call to query_get_cfg() in nvc0_hw_sm_query_end() nvc0: split out begin_query() hook used by MP counters nvc0: rip off the kepler MP-enabling logic from the Fermi codepath nvc0: correctly enable the MP counters' multiplexer on Fermi nvc0: fix sequence field init for MP counters on Fermi nvc0: allow to use 8 MP counters on Fermi nvc0: fix queries which use multiple MP counters on Fermi nvc0: fix monitoring multiple MP counters queries on Fermi nvc0: fix unaligned mem access when reading MP counters on Fermi nvc0: store the number of GPCs to nvc0_screen nvc0: read MP counters of all GPCs on Fermi nvc0: allow only one active query for the MP counters group nvc0: enable compute support by default on Fermi nvc0: move SW/HW queries info to their respective files nvc0: add MP counters variants for GF100/GF110 nvc0: add a note about MP counters on GF100/GF110 src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 208 +- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 14 + src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h | 3 + .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 809 ++--- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h| 7 +- src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c | 64 ++ src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h | 3 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 8 +- src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 1 + 9 files changed, 834 insertions(+), 283 deletions(-) -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/16] nvc0: allow only one active query for the MP counters group
Because we can't expose the number of hardware counters needed for each different query, we don't want to allow more than one active query simultaneously to avoid failure when the maximum number of counters is reached. Note that these groups of GPU counters are currently only used by AMD_performance_monitor. Like for Kepler, this limits the maximum number of active queries to 1 on Fermi. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 20 +--- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index f8d4ba1..c81b85a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -371,22 +371,20 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, info->name = "MP counters"; info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; + /* Because we can't expose the number of hardware counters needed for + * each different query, we don't want to allow more than one active + * query simultaneously to avoid failure when the maximum number of + * counters is reached. Note that these groups of GPU counters are + * currently only used by AMD_performance_monitor. + */ + info->max_active_queries = 1; + if (screen->base.class_3d == NVE4_3D_CLASS) { info->num_queries = NVE4_HW_SM_QUERY_COUNT; - - /* On NVE4+, each multiprocessor have 8 hardware counters separated - * in two distinct domains, but we allow only one active query - * simultaneously because some of them use more than one hardware - * counter and this will result in an undefined behaviour. */ - info->max_active_queries = 1; /* TODO: handle multiple hw counters */ - return 1; +return 1; } else if (screen->base.class_3d < NVE4_3D_CLASS) { info->num_queries = NVC0_HW_SM_QUERY_COUNT; - -/* On NVC0:NVE4, each multiprocessor have 8 hardware counters - * in a single domain. */ -info->max_active_queries = 8; return 1; } } -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 16/16] nvc0: add a note about MP counters on GF100/GF110
MP counters on GF100/GF110 (compute capability 2.0) are buggy because there is a context-switch problem that we need to fix. Results might be wrong sometimes, be careful! Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index c4b40a6..c5ce3e3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -286,6 +286,11 @@ static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] = #undef _M2B /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ +/* NOTES: + * - MP counters on GF100/GF110 (compute capability 2.0) are buggy + * because there is a context-switch problem that we need to fix. + * Results might be wrong sometimes, be careful! + */ static const char *nvc0_hw_sm_query_names[] = { /* MP counters */ -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 15/16] nvc0: add MP counters variants for GF100/GF110
GF100 and GF110 chipsets are compute capability 2.0, while the other Fermi chipsets are compute capability 2.1. That's why, some MP counters are different between these chipsets and we need to handle variants. Signed-off-by: Samuel Pitoiet --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 559 ++--- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h| 1 + 2 files changed, 483 insertions(+), 77 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 174beef..c4b40a6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -298,6 +298,7 @@ static const char *nvc0_hw_sm_query_names[] = "gred_count", "gst_request", "inst_executed", + "inst_issued", "inst_issued1_0", "inst_issued1_1", "inst_issued2_0", @@ -373,82 +374,456 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = }; #define _C(f, o, g, m, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, m, 0, g, s } -#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = { \ - { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\ -} +#define _Q(n, c) [NVC0_HW_SM_QUERY_##n] = c + +/* Compute capability 2.0 (GF100/GF110) */ +static const struct nvc0_hw_sm_query_cfg +nvc0_active_cycles = +{ + .ctr[0] = _C(0x, LOGOP, 0x11, 0x00ff, 0x), + .num_counters = 1, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_active_warps = +{ + .ctr[0] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0010), + .ctr[1] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0020), + .ctr[2] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0030), + .ctr[3] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0040), + .ctr[4] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0050), + .ctr[5] = _C(0x, LOGOP, 0x24, 0x00ff, 0x0060), + .num_counters = 6, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_atom_count = +{ + .ctr[0] = _C(0x, LOGOP, 0x63, 0x00ff, 0x0030), + .num_counters = 1, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_branch = +{ + .ctr[0] = _C(0x, LOGOP, 0x1a, 0x00ff, 0x), + .ctr[1] = _C(0x, LOGOP, 0x1a, 0x00ff, 0x0010), + .num_counters = 2, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_divergent_branch = +{ + .ctr[0] = _C(0x, LOGOP, 0x19, 0x00ff, 0x0020), + .ctr[1] = _C(0x, LOGOP, 0x19, 0x00ff, 0x0030), + .num_counters = 2, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_gld_request = +{ + .ctr[0] = _C(0x, LOGOP, 0x64, 0x00ff, 0x0030), + .num_counters = 1, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_gred_count = +{ + .ctr[0] = _C(0x, LOGOP, 0x63, 0x00ff, 0x0040), + .num_counters = 1, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_gst_request = +{ + .ctr[0] = _C(0x, LOGOP, 0x64, 0x00ff, 0x0060), + .num_counters = 1, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_inst_executed = +{ + .ctr[0] = _C(0x, LOGOP, 0x2d, 0x, 0x1000), + .ctr[1] = _C(0x, LOGOP, 0x2d, 0x, 0x1010), + .num_counters = 2, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; + +static const struct nvc0_hw_sm_query_cfg +nvc0_inst_issued = +{ + .ctr[0] = _C(0x, LOGOP, 0x27, 0x, 0x7060), + .ctr[1] = _C(0x, LOGOP, 0x27, 0x, 0x7070), + .num_counters = 2, + .op = NVC0_COUNTER_OPn_SUM, + .norm = { 1, 1 }, +}; -static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = -{ - _Q(ACTIVE_CYCLES, 1, _C(0x, LOGOP, 0x11, 0x00ff, 0x)), - _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x00ff, 0x0010), - _C(0x, LOGOP, 0x24, 0x00ff, 0x0020), - _C(0x, LOGOP, 0x24, 0x00ff, 0x0030), - _C(0x, LOGOP, 0x24, 0x00ff, 0x0040), - _C(0x, LOGOP, 0x24, 0x00ff, 0x0050), - _C(0x, LOGOP, 0x24, 0x00ff, 0x0060)), - _Q(ATOM_COUNT, 1, _C(0x, LOGOP, 0x6
[Mesa-dev] [PATCH 11/16] nvc0: read MP counters of all GPCs on Fermi
When a card has more than one GPC, the grid used by the compute kernel which reads MP performance counters seems to be too small. The consequence is that the kernel is not launched on all TPCs. Increasing the grid size using the number of GPCs now launches enough blocks and we can read MP performance counters of all TPCs. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 840f200..c22ad4b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -528,7 +528,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; - const uint grid[3] = { screen->mp_count, 1, 1 }; + const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 }; unsigned c; if (unlikely(!screen->pm.prog)) { -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/16] nvc0: fix monitoring multiple MP counters queries on Fermi
For strange reasons, the signal id depends on the slot selected on Fermi but not on Kepler. Fortunately, the signal ids are just offseted by the slot id! Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 147 +++-- 1 file changed, 79 insertions(+), 68 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 99e9073..6ee9fa6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -128,7 +128,7 @@ struct nvc0_hw_sm_counter_cfg { uint32_t func: 16; /* mask or 4-bit logic op (depending on mode) */ uint32_t mode: 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */ - uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */ + uint32_t src_mask; /* mask for signal selection (only for NVC0:NVE4) */ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */ uint32_t sig_sel : 8; /* signal group */ uint32_t src_sel; /* signal selection for up to 4 sources */ @@ -280,78 +280,78 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = 0x80001de7ULL }; -#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s } +#define _C(f, o, g, m, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, m, 0, g, s } #define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = { \ { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\ } static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = { - _Q(ACTIVE_CYCLES, 1, _C(0x, LOGOP, 0x11, 0x)), - _Q(ACTIVE_WARPS,6, _C(0x, LOGOP, 0x24, 0x0010), - _C(0x, LOGOP, 0x24, 0x0021), - _C(0x, LOGOP, 0x24, 0x0032), - _C(0x, LOGOP, 0x24, 0x0043), - _C(0x, LOGOP, 0x24, 0x0054), - _C(0x, LOGOP, 0x24, 0x0065)), - _Q(ATOM_COUNT, 1, _C(0x, LOGOP, 0x63, 0x0030)), - _Q(BRANCH, 2, _C(0x, LOGOP, 0x1a, 0x), - _C(0x, LOGOP, 0x1a, 0x0011)), - _Q(DIVERGENT_BRANCH,2, _C(0x, LOGOP, 0x19, 0x0020), - _C(0x, LOGOP, 0x19, 0x0031)), - _Q(GLD_REQUEST, 1, _C(0x, LOGOP, 0x64, 0x0030)), - _Q(GRED_COUNT, 1, _C(0x, LOGOP, 0x63, 0x0040)), - _Q(GST_REQUEST, 1, _C(0x, LOGOP, 0x64, 0x0060)), - _Q(INST_EXECUTED, 3, _C(0x, LOGOP, 0x2d, 0x), - _C(0x, LOGOP, 0x2d, 0x0011), - _C(0x, LOGOP, 0x2d, 0x0022)), - _Q(INST_ISSUED1_0, 1, _C(0x, LOGOP, 0x7e, 0x0010)), - _Q(INST_ISSUED1_1, 1, _C(0x, LOGOP, 0x7e, 0x0040)), - _Q(INST_ISSUED2_0, 1, _C(0x, LOGOP, 0x7e, 0x0020)), - _Q(INST_ISSUED2_1, 1, _C(0x, LOGOP, 0x7e, 0x0050)), - _Q(LOCAL_LD,1, _C(0x, LOGOP, 0x64, 0x0020)), - _Q(LOCAL_ST,1, _C(0x, LOGOP, 0x64, 0x0050)), - _Q(PROF_TRIGGER_0, 1, _C(0x, LOGOP, 0x01, 0x)), - _Q(PROF_TRIGGER_1, 1, _C(0x, LOGOP, 0x01, 0x0010)), - _Q(PROF_TRIGGER_2, 1, _C(0x, LOGOP, 0x01, 0x0020)), - _Q(PROF_TRIGGER_3, 1, _C(0x, LOGOP, 0x01, 0x0030)), - _Q(PROF_TRIGGER_4, 1, _C(0x, LOGOP, 0x01, 0x0040)), - _Q(PROF_TRIGGER_5, 1, _C(0x, LOGOP, 0x01, 0x0050)), - _Q(PROF_TRIGGER_6, 1, _C(0x, LOGOP, 0x01, 0x0060)), - _Q(PROF_TRIGGER_7, 1, _C(0x, LOGOP, 0x01, 0x0070)), - _Q(SHARED_LD, 1, _C(0x, LOGOP, 0x64, 0x0010)), - _Q(SHARED_ST, 1, _C(0x, LOGOP, 0x64, 0x0040)), - _Q(THREADS_LAUNCHED,6, _C(0x, LOGOP, 0x26, 0x0010), - _C(0x, LOGOP, 0x26, 0x0021), - _C(0x, LOGOP, 0x26, 0x0032), - _C(0x, LOGOP, 0x26, 0x0043), - _C(0x, LOGOP, 0x26, 0x0054), - _C(0x, LOGOP, 0x26, 0x0065)), - _Q(TH_INST_EXECUTED_0, 6, _C(0x, LOGOP, 0xa3, 0x), - _C(0x, LOGOP, 0xa3, 0x0011), - _C(0x, LOGOP, 0xa3, 0x0022), - _C(0x, LOGOP, 0xa3, 0x0033), - _C(0x, LOGOP, 0xa3, 0x0044), - _C(0x, LOGOP, 0xa3, 0x0055)), - _Q(TH_INST_EXECUTED_1, 6, _C(0x, LOGOP, 0xa5, 0x), - _C(0x, LOGOP, 0xa5, 0x0011), - _C(0x, LOGOP, 0xa5, 0x0022
[Mesa-dev] [PATCH 13/16] nvc0: enable compute support by default on Fermi
Compute support was not enabled by default because weird effects on 3D state happened, but I can't reproduce them anymore. This also enables MP performance counters by default on Fermi. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 3 +-- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index c81b85a..80f311b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -291,7 +291,6 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, count += NVE4_HW_SM_QUERY_COUNT; } else if (screen->base.class_3d < NVE4_3D_CLASS) { -/* NVC0_COMPUTE is not always enabled */ count += NVC0_HW_SM_QUERY_COUNT; } } @@ -358,7 +357,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; } else if (screen->base.class_3d < NVE4_3D_CLASS) { -count++; /* NVC0_COMPUTE is not always enabled */ +count++; } } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ba53d10..d6a4ac5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -561,12 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) switch (screen->base.device->chipset & ~0xf) { case 0xc0: case 0xd0: - /* Using COMPUTE has weird effects on 3D state, we need to - * investigate this further before enabling it by default. - */ - if (debug_get_bool_option("NVC0_COMPUTE", false)) - return nvc0_screen_compute_setup(screen, screen->base.pushbuf); - return 0; + return nvc0_screen_compute_setup(screen, screen->base.pushbuf); case 0xe0: return nve4_screen_compute_setup(screen, screen->base.pushbuf); case 0xf0: -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 09/16] nvc0: fix unaligned mem access when reading MP counters on Fermi
Memory access have to be aligned to 128-bits. Note that this doesn't happen when the card only has TPC. This patch fixes the following dmesg fail: gr: GPC0/TPC1/MP trap: global 0004 [MULTIPLE_WARP_ERRORS] warp 000f [UNALIGNED_MEM_ACCESS] Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 6ee9fa6..840f200 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -247,7 +247,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = * mov b32 $r11 c0[0x4] * ext u32 $r8 $r9 0x414 * (not $p0) exit -* mul $r8 u32 $r8 u32 36 +* mul $r8 u32 $r8 u32 48 * add b32 $r10 $c $r10 $r8 * add b32 $r11 $r11 0x0 $c * mov b32 $r8 c0[0x8] @@ -270,7 +270,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = 0x280040001002dde4ULL, 0x7000c01050921c03ULL, 0x800021e7ULL, - 0x100090821c02ULL, + 0x1000c0821c02ULL, 0x480120a29c03ULL, 0x08b2dc42ULL, 0x2800400020021de4ULL, @@ -473,7 +473,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) /* set sequence field to 0 (used to check if result is available) */ for (i = 0; i < screen->mp_count; ++i) { - const unsigned b = (0x24 / 4) * i; + const unsigned b = (0x30 / 4) * i; hq->data[b + 8] = 0; } hq->sequence++; @@ -617,7 +617,7 @@ nvc0_hw_sm_query_read_data(uint32_t count[32][8], unsigned p, c; for (p = 0; p < mp_count; ++p) { - const unsigned b = (0x24 / 4) * p; + const unsigned b = (0x30 / 4) * p; for (c = 0; c < cfg->num_counters; ++c) { if (hq->data[b + 8] != hq->sequence) { @@ -815,7 +815,10 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type) */ space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t); } else { - /* for each MP: + /* + * Note that padding is used to align memory access to 128 bits. + * + * for each MP: * [00] = MP.C0 * [04] = MP.C1 * [08] = MP.C2 @@ -825,8 +828,11 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type) * [18] = MP.C6 * [1c] = MP.C7 * [20] = MP.sequence + * [24] = padding + * [28] = padding + * [2c] = padding */ - space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t); + space = (8 + 1 + 3) * nvc0->screen->mp_count * sizeof(uint32_t); } if (!nvc0_hw_query_allocate(nvc0, &hq->base, space)) { -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/16] nvc0: store the number of GPCs to nvc0_screen
NOUVEAU_GETPARAM_GRAPH_UNITS param returns the number of GPCs, the total number of TPCs and the number of ROP units. Note that when the DRM version is too old the default number of GPCs is fixed to 4. This will be used to launch the compute kernel which is used to read MP performance counters over all GPCs. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index afd91e6..ba53d10 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -914,6 +914,7 @@ nvc0_screen_create(struct nouveau_device *dev) else value = (16 << 8) | 4; } + screen->gpc_count = value & 0x; screen->mp_count = value >> 8; screen->mp_count_compute = screen->mp_count; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 8cf7560..857eb03 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -67,6 +67,7 @@ struct nvc0_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *poly_cache; + uint8_t gpc_count; uint16_t mp_count; uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */ -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 14/16] nvc0: move SW/HW queries info to their respective files
This will help for handling HW SM queries variants on Fermi. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 185 + src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 14 ++ src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.h | 3 + .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 133 +++ .../drivers/nouveau/nvc0/nvc0_query_hw_sm.h| 4 +- src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.c | 64 +++ src/gallium/drivers/nouveau/nvc0/nvc0_query_sw.h | 3 + 7 files changed, 228 insertions(+), 178 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index 80f311b..e4752e2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -141,163 +141,19 @@ nvc0_render_condition(struct pipe_context *pipe, PUSH_DATA (push, hq->bo->offset + hq->offset); } -/* === DRIVER STATISTICS === */ - -#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS - -static const char *nvc0_sw_query_drv_stat_names[] = -{ - "drv-tex_obj_current_count", - "drv-tex_obj_current_bytes", - "drv-buf_obj_current_count", - "drv-buf_obj_current_bytes_vid", - "drv-buf_obj_current_bytes_sys", - "drv-tex_transfers_rd", - "drv-tex_transfers_wr", - "drv-tex_copy_count", - "drv-tex_blit_count", - "drv-tex_cache_flush_count", - "drv-buf_transfers_rd", - "drv-buf_transfers_wr", - "drv-buf_read_bytes_staging_vid", - "drv-buf_write_bytes_direct", - "drv-buf_write_bytes_staging_vid", - "drv-buf_write_bytes_staging_sys", - "drv-buf_copy_bytes", - "drv-buf_non_kernel_fence_sync_count", - "drv-any_non_kernel_fence_sync_count", - "drv-query_sync_count", - "drv-gpu_serialize_count", - "drv-draw_calls_array", - "drv-draw_calls_indexed", - "drv-draw_calls_fallback_count", - "drv-user_buffer_upload_bytes", - "drv-constbuf_upload_count", - "drv-constbuf_upload_bytes", - "drv-pushbuf_count", - "drv-resource_validate_count" -}; - -#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */ - -/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */ - -/* NOTE: intentionally using the same names as NV */ -static const char *nve4_hw_sm_query_names[] = -{ - /* MP counters */ - "active_cycles", - "active_warps", - "atom_count", - "branch", - "divergent_branch", - "gld_request", - "global_ld_mem_divergence_replays", - "global_store_transaction", - "global_st_mem_divergence_replays", - "gred_count", - "gst_request", - "inst_executed", - "inst_issued", - "inst_issued1", - "inst_issued2", - "l1_global_load_hit", - "l1_global_load_miss", - "l1_local_load_hit", - "l1_local_load_miss", - "l1_local_store_hit", - "l1_local_store_miss", - "l1_shared_load_transactions", - "l1_shared_store_transactions", - "local_load", - "local_load_transactions", - "local_store", - "local_store_transactions", - "prof_trigger_00", - "prof_trigger_01", - "prof_trigger_02", - "prof_trigger_03", - "prof_trigger_04", - "prof_trigger_05", - "prof_trigger_06", - "prof_trigger_07", - "shared_load", - "shared_load_replay", - "shared_store", - "shared_store_replay", - "sm_cta_launched", - "threads_launched", - "uncached_global_load_transaction", - "warps_launched", - /* metrics, i.e. functions of the MP counters */ - "metric-ipc", /* inst_executed, clock */ - "metric-ipac", /* inst_executed, active_cycles */ - "metric-ipec", /* inst_executed, (bool)inst_executed */ - "metric-achieved_occupancy",/* active_warps, active_cycles */ - "metric-sm_efficiency", /* active_cycles, clock */ - "metric-inst_replay_overhead" /* inst_issued, inst_executed */ -}; - -/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ -static const char *nvc0_hw_sm_query_names[] = -{ - /* MP counters */ - "active_cycles", - "active_warps", - "atom_count", - "branch", - "divergent_branch", - "gld_request", - "gred_count", - "gst_request", - &
Re: [Mesa-dev] [PATCH 10/16] nvc0: store the number of GPCs to nvc0_screen
On 10/16/2015 07:24 PM, Ilia Mirkin wrote: On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset wrote: NOUVEAU_GETPARAM_GRAPH_UNITS param returns the number of GPCs, the total number of TPCs and the number of ROP units. Note that when the DRM version is too old the default number of GPCs is fixed to 4. This will be used to launch the compute kernel which is used to read MP performance counters over all GPCs. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index afd91e6..ba53d10 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -914,6 +914,7 @@ nvc0_screen_create(struct nouveau_device *dev) else value = (16 << 8) | 4; } + screen->gpc_count = value & 0x; gpc_count is a u8... I guess you wanted to make this &0xff? Doesn't *really* matter in practice, but a bit confusing. Yes, good catch. screen->mp_count = value >> 8; screen->mp_count_compute = screen->mp_count; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 8cf7560..857eb03 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -67,6 +67,7 @@ struct nvc0_screen { struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *poly_cache; + uint8_t gpc_count; uint16_t mp_count; uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */ -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/16] nvc0: fix queries which use multiple MP counters on Fermi
On 10/16/2015 07:32 PM, Ilia Mirkin wrote: Other than the missing * (1 << c), what was wrong with the old logic? MP counters were always configured starting from slot 0 to cfg->num_src. So, if you monitored two hardware events at the same time, the first one was overwritten by the second one. Now, I check if the slot is free before pushing the configuration through the pushbuf. On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset wrote: Queries which use more than one MP counters was misconfigured and computing the final result was also wrong because sources need to be configured on different hardware counters instead. According to the blob, computing the result is now as follows: FOR i..n val += ctr[i] * pow(2, i) Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 128 + 1 file changed, 81 insertions(+), 47 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 2060662..99e9073 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -131,7 +131,7 @@ struct nvc0_hw_sm_counter_cfg uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */ uint32_t sig_sel : 8; /* signal group */ - uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */ + uint32_t src_sel; /* signal selection for up to 4 sources */ }; #define NVC0_COUNTER_OPn_SUM0 @@ -280,44 +280,82 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] = 0x80001de7ULL }; -#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_HW_SM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } } +#define _C(f, o, g, s) { f, NVC0_COMPUTE_MP_PM_OP_MODE_##o, 0, 0, g, s } +#define _Q(n, c, ...) [NVC0_HW_SM_QUERY_##n] = { \ + { __VA_ARGS__ }, c, NVC0_COUNTER_OPn_SUM, { 1, 1 },\ +} static const struct nvc0_hw_sm_query_cfg nvc0_hw_sm_queries[] = { - _Q(ACTIVE_CYCLES, 0x, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(ACTIVE_WARPS,0x, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), - _Q(ATOM_COUNT, 0x, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(BRANCH, 0x, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00), - _Q(DIVERGENT_BRANCH,0x, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00), - _Q(GLD_REQUEST, 0x, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(GRED_COUNT, 0x, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(GST_REQUEST, 0x, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_EXECUTED, 0x, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00), - _Q(INST_ISSUED1_0, 0x, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED1_1, 0x, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED2_0, 0x, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(INST_ISSUED2_1, 0x, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LOCAL_LD,0x, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(LOCAL_ST,0x, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_0, 0x, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_1, 0x, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_2, 0x, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_3, 0x, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_4, 0x, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_5, 0x, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_6, 0x, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(PROF_TRIGGER_7, 0x, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(SHARED_LD, 0x, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(SHARED_ST, 0x, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00), - _Q(THREADS_LAUNCHED,0x, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65), - _Q(TH_INST_EXECUTED_0, 0x, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_1, 0x, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_2, 0x, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55), - _Q(TH_INST_EXECUTED_3, 0x, LOGOP,
Re: [Mesa-dev] [PATCH 16/16] nvc0: add a note about MP counters on GF100/GF110
On 10/16/2015 07:50 PM, Ilia Mirkin wrote: Series is Reviewed-by: Ilia Mirkin I had a couple of very minor comments that you can feel free to accept or ignore. Thank you for this review Ilia, and I think I'll accept all of your changes. :) On Fri, Oct 16, 2015 at 1:22 PM, Samuel Pitoiset wrote: MP counters on GF100/GF110 (compute capability 2.0) are buggy because there is a context-switch problem that we need to fix. Results might be wrong sometimes, be careful! Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index c4b40a6..c5ce3e3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -286,6 +286,11 @@ static const struct nvc0_hw_sm_query_cfg nve4_hw_sm_queries[] = #undef _M2B /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */ +/* NOTES: + * - MP counters on GF100/GF110 (compute capability 2.0) are buggy + * because there is a context-switch problem that we need to fix. + * Results might be wrong sometimes, be careful! + */ static const char *nvc0_hw_sm_query_names[] = { /* MP counters */ -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi
As explained in the CUDA toolkit documentation, "a metric is a characteristic of an application that is calculated from one or more event values." Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444 + .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h| 42 ++ 4 files changed, 504 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index edc6cf4..c18e9f5 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ nvc0/nvc0_query.h \ nvc0/nvc0_query_hw.c \ nvc0/nvc0_query_hw.h \ + nvc0/nvc0_query_hw_metric.c \ + nvc0/nvc0_query_hw_metric.h \ nvc0/nvc0_query_hw_sm.c \ nvc0/nvc0_query_hw_sm.h \ nvc0/nvc0_query_sw.c \ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 91254be..90ee82f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -25,6 +25,7 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" #define NVC0_HW_QUERY_STATE_READY 0 @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index) return (struct nvc0_query *)hq; } + hq = nvc0_hw_metric_create_query(nvc0, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nvc0_query *)hq; + } + hq = CALLOC_STRUCT(nvc0_hw_query); if (!hq) return NULL; @@ -435,14 +442,20 @@ int nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, struct pipe_driver_query_info *info) { - int num_hw_sm_queries = 0; + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); if (!info) - return num_hw_sm_queries; + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nvc0_hw_sm_get_driver_query_info(screen, id, info); - return nvc0_hw_sm_get_driver_query_info(screen, id, info); + return nvc0_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c new file mode 100644 index 000..dbe350a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -0,0 +1,444 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_query_hw_metric.h" +#include "nvc0/nvc0_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ +static const char *nvc0_hw_metric_names[] = +{ + "metric-achieved_occupancy", + "metric-branch_efficiency", + "metric-inst_issued", + "metric-inst_per_wrap", + "metric-inst_replay_overhead", + "metric-issued_ipc", + "metric-issue_slots", + "metric-issue_slot_utilization", + "metric-ipc", +}; + +struct nvc0_hw_metric_query_cfg { + uint
Re: [Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi
On 10/16/2015 11:22 PM, Ilia Mirkin wrote: On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset wrote: As explained in the CUDA toolkit documentation, "a metric is a characteristic of an application that is calculated from one or more event values." Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444 + .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h| 42 ++ 4 files changed, 504 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index edc6cf4..c18e9f5 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ nvc0/nvc0_query.h \ nvc0/nvc0_query_hw.c \ nvc0/nvc0_query_hw.h \ + nvc0/nvc0_query_hw_metric.c \ + nvc0/nvc0_query_hw_metric.h \ nvc0/nvc0_query_hw_sm.c \ nvc0/nvc0_query_hw_sm.h \ nvc0/nvc0_query_sw.c \ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 91254be..90ee82f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -25,6 +25,7 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" #define NVC0_HW_QUERY_STATE_READY 0 @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index) return (struct nvc0_query *)hq; } + hq = nvc0_hw_metric_create_query(nvc0, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nvc0_query *)hq; + } + hq = CALLOC_STRUCT(nvc0_hw_query); if (!hq) return NULL; @@ -435,14 +442,20 @@ int nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, struct pipe_driver_query_info *info) { - int num_hw_sm_queries = 0; + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); if (!info) - return num_hw_sm_queries; + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nvc0_hw_sm_get_driver_query_info(screen, id, info); - return nvc0_hw_sm_get_driver_query_info(screen, id, info); + return nvc0_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c new file mode 100644 index 000..dbe350a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -0,0 +1,444 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_query_hw_metric.h" +#include "nvc0/nvc0_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ +static const char *nvc0_hw_metric_names[] = +{ + "metric-achieved_occupancy", + "metric-branch_efficiency", + "metric-inst_issued", + "metric-inst_per_wrap", + "metric-inst_replay_overhead", + "metric-issued_ipc", + "metric-issue_
Re: [Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi
On 10/16/2015 11:57 PM, Ilia Mirkin wrote: On Fri, Oct 16, 2015 at 5:35 PM, Samuel Pitoiset wrote: On 10/16/2015 11:22 PM, Ilia Mirkin wrote: On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset wrote: As explained in the CUDA toolkit documentation, "a metric is a characteristic of an application that is calculated from one or more event values." Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 444 + .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h| 42 ++ 4 files changed, 504 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index edc6cf4..c18e9f5 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ nvc0/nvc0_query.h \ nvc0/nvc0_query_hw.c \ nvc0/nvc0_query_hw.h \ + nvc0/nvc0_query_hw_metric.c \ + nvc0/nvc0_query_hw_metric.h \ nvc0/nvc0_query_hw_sm.c \ nvc0/nvc0_query_hw_sm.h \ nvc0/nvc0_query_sw.c \ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c index 91254be..90ee82f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c @@ -25,6 +25,7 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" #define NVC0_HW_QUERY_STATE_READY 0 @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned type, unsigned index) return (struct nvc0_query *)hq; } + hq = nvc0_hw_metric_create_query(nvc0, type); + if (hq) { + hq->base.funcs = &hw_query_funcs; + return (struct nvc0_query *)hq; + } + hq = CALLOC_STRUCT(nvc0_hw_query); if (!hq) return NULL; @@ -435,14 +442,20 @@ int nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, struct pipe_driver_query_info *info) { - int num_hw_sm_queries = 0; + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); + num_hw_metric_queries = + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); if (!info) - return num_hw_sm_queries; + return num_hw_sm_queries + num_hw_metric_queries; + + if (id < num_hw_sm_queries) + return nvc0_hw_sm_get_driver_query_info(screen, id, info); - return nvc0_hw_sm_get_driver_query_info(screen, id, info); + return nvc0_hw_metric_get_driver_query_info(screen, + id - num_hw_sm_queries, info); } void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c new file mode 100644 index 000..dbe350a --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -0,0 +1,444 @@ +/* + * Copyright 2015 Samuel Pitoiset + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0/nvc0_context.h" +#include "nvc0/nvc0_query_hw_metric.h" +#include "nvc0/nvc0_query_hw_sm.h" + +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ +static const char *nvc0_hw_metric_names[] = +{ + "metric-achieved_occupancy", + "metric-branch_efficiency", + "metric-inst_issued", + "metric-inst_
[Mesa-dev] [PATCH] nvc0: do not bind input params at compute state init on Fermi
It looks like binding a constant buffer on compute overwrites the 3D state. To avoid that, we already re-bind all the 3D constant buffers after launching a compute grid but this is not enough. Binding the constant buffer of input parameters for the compute state at initialization corrupts the 3D constant buffers, and it's just useless to bind it because this is not needed until we really launch a grid. This fixes some piglit regressions related to interpolation tests introduced in "nvc0: enable compute support by default on Fermi". Fixes: 00d6186 (nvc0: enable compute support by default on Fermi) Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 8 1 file changed, 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 96d753c..e33af04 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -105,14 +105,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - /* bind parameters buffer */ - BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); - PUSH_DATA (push, screen->parm->size); - PUSH_DATAh(push, screen->parm->offset); - PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); - PUSH_DATA (push, (0 << 8) | 1); - /* TODO: textures & samplers */ return 0; -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: do not bind input params at compute state init on Fermi
On 10/17/2015 08:08 PM, Ilia Mirkin wrote: This seems surprising... could I convince you to trace a test that executes both a graphics and compute pipeline, which both use (different) uniforms? I'm convinced because this seems to be a bit weird for me too. Anyways, this patch is fine for now, this is Reviewed-by: Ilia Mirkin Thanks. On Sat, Oct 17, 2015 at 12:19 PM, Samuel Pitoiset wrote: It looks like binding a constant buffer on compute overwrites the 3D state. To avoid that, we already re-bind all the 3D constant buffers after launching a compute grid but this is not enough. Binding the constant buffer of input parameters for the compute state at initialization corrupts the 3D constant buffers, and it's just useless to bind it because this is not needed until we really launch a grid. This fixes some piglit regressions related to interpolation tests introduced in "nvc0: enable compute support by default on Fermi". Fixes: 00d6186 (nvc0: enable compute support by default on Fermi) Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 8 1 file changed, 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 96d753c..e33af04 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -105,14 +105,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - /* bind parameters buffer */ - BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); - PUSH_DATA (push, screen->parm->size); - PUSH_DATAh(push, screen->parm->offset); - PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); - PUSH_DATA (push, (0 << 8) | 1); - /* TODO: textures & samplers */ return 0; -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 13/16] nvc0: enable compute support by default on Fermi
On 10/17/2015 09:11 PM, Jan Vesely wrote: Does this mean it should be possible to hook up clover with nouveau? As I said Ilia, this is just the ability to launch compute kernels on Fermi. Unfortunately, OpenCL is still not supported by Nouveau but I hope this is going to change. Jan On Fri, 2015-10-16 at 19:22 +0200, Samuel Pitoiset wrote: Compute support was not enabled by default because weird effects on 3D state happened, but I can't reproduce them anymore. This also enables MP performance counters by default on Fermi. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 3 +-- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 7 +-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index c81b85a..80f311b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -291,7 +291,6 @@ nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen, count += NVE4_HW_SM_QUERY_COUNT; } else if (screen->base.class_3d < NVE4_3D_CLASS) { -/* NVC0_COMPUTE is not always enabled */ count += NVC0_HW_SM_QUERY_COUNT; } } @@ -358,7 +357,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; } else if (screen->base.class_3d < NVE4_3D_CLASS) { -count++; /* NVC0_COMPUTE is not always enabled */ +count++; } } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ba53d10..d6a4ac5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -561,12 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) switch (screen->base.device->chipset & ~0xf) { case 0xc0: case 0xd0: - /* Using COMPUTE has weird effects on 3D state, we need to - * investigate this further before enabling it by default. - */ - if (debug_get_bool_option("NVC0_COMPUTE", false)) - return nvc0_screen_compute_setup(screen, screen ->base.pushbuf); - return 0; + return nvc0_screen_compute_setup(screen, screen ->base.pushbuf); case 0xe0: return nve4_screen_compute_setup(screen, screen ->base.pushbuf); case 0xf0: ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/4] nv50: move nva0_so_target_save_offset() to its correct location
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query.c | 18 -- src/gallium/drivers/nouveau/nv50/nv50_query.h | 3 --- src/gallium/drivers/nouveau/nv50/nv50_state.c | 18 ++ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 7718d69..1b4abdb 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -444,24 +444,6 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, } void -nva0_so_target_save_offset(struct pipe_context *pipe, - struct pipe_stream_output_target *ptarg, - unsigned index, bool serialize) -{ - struct nv50_so_target *targ = nv50_so_target(ptarg); - - if (serialize) { - struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; - PUSH_SPACE(push, 2); - BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); - PUSH_DATA (push, 0); - } - - nv50_query(targ->pq)->index = index; - nv50_query_end(pipe, targ->pq); -} - -void nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h index 722af0c..a703013 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -33,8 +33,5 @@ void nv50_init_query_functions(struct nv50_context *); void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, struct nv50_query *, unsigned result_offset); void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); -void nva0_so_target_save_offset(struct pipe_context *, -struct pipe_stream_output_target *, -unsigned, bool); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 410e631..8af2add 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -1057,6 +1057,24 @@ nv50_so_target_create(struct pipe_context *pipe, } static void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, bool serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + pipe->end_query(pipe, targ->pq); +} + +static void nv50_so_target_destroy(struct pipe_context *pipe, struct pipe_stream_output_target *ptarg) { -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/4] nv50: move HW queries to nv50_query_hw.c/h
Hi there, As for nvc0, this series moves HW queries to improve readability of this area of the driver and to prepare the way for both MP counters and global perf counters. There are no regressions with piglit. Feel free to review, Thanks. Samuel Pitoiset (4): nv50: add a header file for nv50_query nv50: move nva0_so_target_save_offset() to its correct location nv50: move HW queries to nv50_query_hw.c/h files nv50: do not create an invalid HW query type src/gallium/drivers/nouveau/Makefile.sources | 3 + src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +- src/gallium/drivers/nouveau/nv50/nv50_query.c | 397 ++-- src/gallium/drivers/nouveau/nv50/nv50_query.h | 33 ++ src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 410 + src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 50 +++ .../drivers/nouveau/nv50/nv50_shader_state.c | 7 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 21 +- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 4 +- 9 files changed, 551 insertions(+), 386 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/4] nv50: add a header file for nv50_query
Like for nvc0, this will allow to split different types of queries and to prepare the way for both global performance counters and MP counters. While we are at it, make use of nv50_query struct instead of pipe_query. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 1 + src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +-- src/gallium/drivers/nouveau/nv50/nv50_query.c | 29 ++-- src/gallium/drivers/nouveau/nv50/nv50_query.h | 40 ++ .../drivers/nouveau/nv50/nv50_shader_state.c | 4 +-- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 3 +- 6 files changed, 49 insertions(+), 40 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index c18e9f5..06d9d97 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -73,6 +73,7 @@ NV50_C_SOURCES := \ nv50/nv50_program.h \ nv50/nv50_push.c \ nv50/nv50_query.c \ + nv50/nv50_query.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 69c1212..fb74a97 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -16,6 +16,7 @@ #include "nv50/nv50_program.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_transfer.h" +#include "nv50/nv50_query.h" #include "nouveau_context.h" #include "nouveau_debug.h" @@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *); /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_query.c */ -void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method, - struct pipe_query *, unsigned result_offset); -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); -void nva0_so_target_save_offset(struct pipe_context *, -struct pipe_stream_output_target *, -unsigned index, bool seralize); - -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) - /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 5368ee7..7718d69 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -25,6 +25,7 @@ #define NV50_PUSH_EXPLICIT_SPACE_CHECKING #include "nv50/nv50_context.h" +#include "nv50/nv50_query.h" #include "nv_object.xml.h" #define NV50_QUERY_STATE_READY 0 @@ -39,29 +40,8 @@ * queries anyway. */ -struct nv50_query { - uint32_t *data; - uint16_t type; - uint16_t index; - uint32_t sequence; - struct nouveau_bo *bo; - uint32_t base; - uint32_t offset; /* base + i * 32 */ - uint8_t state; - bool is64bit; - int nesting; /* only used for occlusion queries */ - struct nouveau_mm_allocation *mm; - struct nouveau_fence *fence; -}; - #define NV50_QUERY_ALLOC_SPACE 256 -static inline struct nv50_query * -nv50_query(struct pipe_query *pipe) -{ - return (struct nv50_query *)pipe; -} - static bool nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -363,9 +343,8 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, } void -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) { - struct nv50_query *q = nv50_query(pq); unsigned offset = q->offset; PUSH_SPACE(push, 5); @@ -453,10 +432,8 @@ nv50_render_condition(struct pipe_context *pipe, void nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, - struct pipe_query *pq, unsigned result_offset) + struct nv50_query *q, unsigned result_offset) { - struct nv50_query *q = nv50_query(pq); - nv50_query_update(q); if (q->state != NV50_QUERY_STATE_READY) nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h new file mode 100644 index 000..722af0c --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h @@ -0,0 +1,40 @@ +#ifndef __NV50_QUERY_H__ +#define __NV50_QUERY_H__ + +#include "pipe/p_context.h" + +#include "nouveau_context.h" +#include "n
[Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type
While we are at it, store the rotate offset for occlusion queries to nv50_hw_query like on nvc0. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +--- src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 3 +- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index fcdd183..6260410 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) * query might set the initial render condition to false even *after* we re- * initialized it to true. */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - hq->offset += 32; - hq->data += 32 / sizeof(*hq->data); + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); @@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) { struct nv50_hw_query *hq; struct nv50_query *q; + unsigned space; hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) @@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) q->funcs = &hw_query_funcs; q->type = type; - if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + + if (!nv50_hw_query_allocate(nv50, q, space)) { FREE(hq); return NULL; } - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + if (hq->rotate) { /* we advance before query_begin ! */ - hq->offset -= 32; - hq->data -= 32 / sizeof(*hq->data); + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); } - hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - return q; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index ea2bf24..3a53e8a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -24,9 +24,10 @@ struct nv50_hw_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base_offset; - uint32_t offset; /* base + i * 32 */ + uint32_t offset; /* base + i * rotate */ uint8_t state; bool is64bit; + uint8_t rotate; int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/4] nv50: move HW queries to nv50_query_hw.c/h files
Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nv50/nv50_query.c | 354 ++- src/gallium/drivers/nouveau/nv50/nv50_query.h | 26 +- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 389 + src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 49 +++ .../drivers/nouveau/nv50/nv50_shader_state.c | 7 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 3 +- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 5 +- 8 files changed, 486 insertions(+), 349 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 06d9d97..83f8113 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -74,6 +74,8 @@ NV50_C_SOURCES := \ nv50/nv50_push.c \ nv50/nv50_query.c \ nv50/nv50_query.h \ + nv50/nv50_query_hw.c \ + nv50/nv50_query_hw.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 1b4abdb..dd9b85b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -26,334 +26,45 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" -#include "nv_object.xml.h" - -#define NV50_QUERY_STATE_READY 0 -#define NV50_QUERY_STATE_ACTIVE 1 -#define NV50_QUERY_STATE_ENDED 2 -#define NV50_QUERY_STATE_FLUSHED 3 - -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - -#define NV50_QUERY_ALLOC_SPACE 256 - -static bool -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) -{ - struct nv50_screen *screen = nv50->screen; - int ret; - - if (q->bo) { - nouveau_bo_ref(NULL, &q->bo); - if (q->mm) { - if (q->state == NV50_QUERY_STATE_READY) -nouveau_mm_free(q->mm); - else -nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, - q->mm); - } - } - if (size) { - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); - if (!q->bo) - return false; - q->offset = q->base; - - ret = nouveau_bo_map(q->bo, 0, screen->base.client); - if (ret) { - nv50_query_allocate(nv50, q, 0); - return false; - } - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); - } - return true; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); -} +#include "nv50/nv50_query_hw.h" static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q; - q = CALLOC_STRUCT(nv50_query); - if (!q) - return NULL; - - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { - FREE(q); - return NULL; - } - - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - q->type = type; - - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 32; - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ - } - + q = nv50_hw_create_query(nv50, type, index); return (struct pipe_query *)q; } static void -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, - unsigned offset, uint32_t get) +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) { - offset += q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, get); + struct nv50_query *q = nv50_query(pq); + q->funcs->destroy_query(nv50_context(pipe), q); } static boolean -
Re: [Mesa-dev] [PATCH 1/4] nv50: add a header file for nv50_query
On 10/19/2015 10:43 AM, Pierre Moreau wrote: Hi Samuel, (some comments further down) On 11:30 PM - Oct 18 2015, Samuel Pitoiset wrote: Like for nvc0, this will allow to split different types of queries and to prepare the way for both global performance counters and MP counters. While we are at it, make use of nv50_query struct instead of pipe_query. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 1 + src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +-- src/gallium/drivers/nouveau/nv50/nv50_query.c | 29 ++-- src/gallium/drivers/nouveau/nv50/nv50_query.h | 40 ++ .../drivers/nouveau/nv50/nv50_shader_state.c | 4 +-- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 3 +- 6 files changed, 49 insertions(+), 40 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index c18e9f5..06d9d97 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -73,6 +73,7 @@ NV50_C_SOURCES := \ nv50/nv50_program.h \ nv50/nv50_push.c \ nv50/nv50_query.c \ + nv50/nv50_query.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 69c1212..fb74a97 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -16,6 +16,7 @@ #include "nv50/nv50_program.h" #include "nv50/nv50_resource.h" #include "nv50/nv50_transfer.h" +#include "nv50/nv50_query.h" #include "nouveau_context.h" #include "nouveau_debug.h" @@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *); /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_query.c */ -void nv50_init_query_functions(struct nv50_context *); -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method, - struct pipe_query *, unsigned result_offset); -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); -void nva0_so_target_save_offset(struct pipe_context *, -struct pipe_stream_output_target *, -unsigned index, bool seralize); - -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) - /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 5368ee7..7718d69 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -25,6 +25,7 @@ #define NV50_PUSH_EXPLICIT_SPACE_CHECKING #include "nv50/nv50_context.h" +#include "nv50/nv50_query.h" #include "nv_object.xml.h" #define NV50_QUERY_STATE_READY 0 @@ -39,29 +40,8 @@ * queries anyway. */ -struct nv50_query { - uint32_t *data; - uint16_t type; - uint16_t index; - uint32_t sequence; - struct nouveau_bo *bo; - uint32_t base; - uint32_t offset; /* base + i * 32 */ - uint8_t state; - bool is64bit; - int nesting; /* only used for occlusion queries */ - struct nouveau_mm_allocation *mm; - struct nouveau_fence *fence; -}; - #define NV50_QUERY_ALLOC_SPACE 256 -static inline struct nv50_query * -nv50_query(struct pipe_query *pipe) -{ - return (struct nv50_query *)pipe; -} - static bool nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) { @@ -363,9 +343,8 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, } void -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) { - struct nv50_query *q = nv50_query(pq); unsigned offset = q->offset; PUSH_SPACE(push, 5); @@ -453,10 +432,8 @@ nv50_render_condition(struct pipe_context *pipe, void nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, - struct pipe_query *pq, unsigned result_offset) + struct nv50_query *q, unsigned result_offset) { - struct nv50_query *q = nv50_query(pq); - nv50_query_update(q); if (q->state != NV50_QUERY_STATE_READY) nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h b/src/gallium/drivers/nouveau/nv50/nv50_query.h new file mode 100644 index 000..722af0c --- /dev/null +++ b/src/gallium/drive
Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type
On 10/19/2015 11:01 AM, Pierre Moreau wrote: Hi Samuel, (some comments below) On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote: While we are at it, store the rotate offset for occlusion queries to nv50_hw_query like on nvc0. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +--- src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 3 +- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index fcdd183..6260410 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) * query might set the initial render condition to false even *after* we re- * initialized it to true. */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - hq->offset += 32; - hq->data += 32 / sizeof(*hq->data); + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); @@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) { struct nv50_hw_query *hq; struct nv50_query *q; + unsigned space; hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) @@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) q->funcs = &hw_query_funcs; q->type = type; - if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; You should have `hq->rotate` default to 0 in other cases, as IIRC, you have no guaranty about the value of an uninitialised variable. CALLOC_STRUCT will be initialize all fields to 0. + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`. + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + + if (!nv50_hw_query_allocate(nv50, q, space)) { `space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to introducing this `space` variable? Do you plan to later add other possible values to it? I have a patch locally which reduces the size of that buffer for some queries, but this is not really related to this series. I'll submit it later (with other patches). Pierre FREE(hq); return NULL; } - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + if (hq->rotate) { /* we advance before query_begin ! */ - hq->offset -= 32; - hq->data -= 32 / sizeof(*hq->data); + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); } - hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - return q; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index ea2bf24..3a53e8a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -24,9 +24,10 @@ struct nv50_hw_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base_offset; - uint32_t offset; /* base + i * 32 */ + uint32_t offset; /* base + i * rotate */ uint8_t state; bool is64bit; + uint8_t rotate; int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type
On 10/19/2015 12:43 PM, Pierre Moreau wrote: On 11:06 AM - Oct 19 2015, Samuel Pitoiset wrote: On 10/19/2015 11:01 AM, Pierre Moreau wrote: Hi Samuel, (some comments below) On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote: While we are at it, store the rotate offset for occlusion queries to nv50_hw_query like on nvc0. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 +--- src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 3 +- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index fcdd183..6260410 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) * query might set the initial render condition to false even *after* we re- * initialized it to true. */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - hq->offset += 32; - hq->data += 32 / sizeof(*hq->data); + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); @@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) { struct nv50_hw_query *hq; struct nv50_query *q; + unsigned space; hq = CALLOC_STRUCT(nv50_hw_query); if (!hq) @@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) q->funcs = &hw_query_funcs; q->type = type; - if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; You should have `hq->rotate` default to 0 in other cases, as IIRC, you have no guaranty about the value of an uninitialised variable. CALLOC_STRUCT will be initialize all fields to 0. Oh, that's nice! Didn't know about it. + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`. + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + space = NV50_HW_QUERY_ALLOC_SPACE; + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + + if (!nv50_hw_query_allocate(nv50, q, space)) { `space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to introducing this `space` variable? Do you plan to later add other possible values to it? I have a patch locally which reduces the size of that buffer for some queries, but this is not really related to this series. I'll submit it later (with other patches). One could argue then that you should introduce `space` in those later patches. space was already here, I just kept it :) Anyway, Reviewed-by: Pierre Moreau Thanks! Pierre FREE(hq); return NULL; } - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + if (hq->rotate) { /* we advance before query_begin ! */ - hq->offset -= 32; - hq->data -= 32 / sizeof(*hq->data); + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); } - hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - return q; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index ea2bf24..3a53e8a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -24,9 +24,10 @@ struct nv50_hw_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base_offset; - uint32_t offset; /* base + i * 32 */ + uint32_t offset; /* base + i * rotate */ uint8_t state; bool is64bit; + uint8_t rotate; int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; -- 2.6.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___
Re: [Mesa-dev] [PATCH v2 1/7] nvc0: fix crash when nv50_miptree_from_handle fails
On 10/22/2015 01:02 AM, Julien Isorce wrote: Sorry this patch should not have gone in the v2 since it has been already reviewed by Emil. But thx for your review. I experienced the crash when testing patch 5/7 of this patch series, around "resource = pscreen->resource_from_handle" in the new vaCreateSurface2 function. Just passing a wrong fd. I checked your remark for nv50 and nv30 and they don't make this step. From what I can see, nvc0 re-use nv50_miptree_from_handle from nv50 but still has its own nvc0_miptree_vtbl. But that's just a guess :) ACK. Thanks for your answer. Do you need someone to push this patch? Cheers Julien On 20 October 2015 at 18:04, samuel.pitoiset mailto:samuel.pitoi...@gmail.com>> wrote: Is there a particular situation where nv50_miptree_from_handle() fails? And did you check nv50? Anyway, this patch is: Reviewed-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>> On 20/10/2015 18:34, Julien Isorce wrote: Signed-off-by: Julien Isorce mailto:j.iso...@samsung.com>> --- src/gallium/drivers/nouveau/nvc0/nvc0_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c index 12b5a02..15c803c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c @@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen, } else { struct pipe_resource *res = nv50_miptree_from_handle(screen, templ, whandle); - nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; + if (res) + nv04_resource(res)->vtbl = &nvc0_miptree_vtbl; return res; } } ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org> http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails
On 10/22/2015 01:16 AM, Julien Isorce wrote: The real fix is in nouveau_drm_winsys.c by setting dev to 0. Which means dev's ownership has been passed to previous call. Other changes are there to be consistent with what the screen_create functions already do on errors. This actually happens because nouveau_device_del() is (sometimes) called twice when nvXX_screen_create() fails. I don't really like this solution but I don't have a better one for now, I'll think about that in the next few days. :) Note that you forgot to call nouveau_device_del() in nvc0_screen_create(). Encountered this crash because nvc0_screen_create sometimes fails with: nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: -16 Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354 Signed-off-by: Julien Isorce --- src/gallium/drivers/nouveau/nv30/nv30_screen.c | 5 - src/gallium/drivers/nouveau/nv50/nv50_screen.c | 4 +++- src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 0330164..9b8ddac 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev) unsigned oclass = 0; int ret, i; - if (!screen) + if (!screen) { + nouveau_device_del(&dev); return NULL; + } switch (dev->chipset & 0xf0) { case 0x30: @@ -456,6 +458,7 @@ nv30_screen_create(struct nouveau_device *dev) if (!oclass) { NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset); + nouveau_device_del(&dev); FREE(screen); return NULL; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ec51d00..e9604d5 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -711,8 +711,10 @@ nv50_screen_create(struct nouveau_device *dev) int ret; screen = CALLOC_STRUCT(nv50_screen); - if (!screen) + if (!screen) { + nouveau_device_del(&dev); return NULL; + } pscreen = &screen->base.base; ret = nouveau_screen_init(&screen->base, dev); diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c index c6603e3..bd1d761 100644 --- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c @@ -117,6 +117,8 @@ nouveau_drm_screen_create(int fd) } screen = (struct nouveau_screen*)init(dev); + /* Previous init func took ownership of dev */ + dev = 0; if (!screen) goto err; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 4/4] nv50: do not create an invalid HW query type
While we are at it, store the rotate offset for occlusion queries to nv50_hw_query like on nvc0. Changes since v2: - remove useless 'space' variable Signed-off-by: Samuel Pitoiset Reviewed-by: Pierre Moreau --- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 39 +--- src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 3 +- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c index fcdd183..945ce7ab 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c @@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) * query might set the initial render condition to false even *after* we re- * initialized it to true. */ - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - hq->offset += 32; - hq->data += 32 / sizeof(*hq->data); + if (hq->rotate) { + hq->offset += hq->rotate; + hq->data += hq->rotate / sizeof(*hq->data); if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); @@ -339,22 +339,39 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) q->funcs = &hw_query_funcs; q->type = type; + switch (q->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + hq->rotate = 32; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_PIPELINE_STATISTICS: + hq->is64bit = true; + break; + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + break; + default: + debug_printf("invalid query type: %u\n", type); + FREE(q); + return NULL; + } + if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { FREE(hq); return NULL; } - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { + if (hq->rotate) { /* we advance before query_begin ! */ - hq->offset -= 32; - hq->data -= 32 / sizeof(*hq->data); + hq->offset -= hq->rotate; + hq->data -= hq->rotate / sizeof(*hq->data); } - hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - return q; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h index fe518a5..294c67d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h @@ -14,9 +14,10 @@ struct nv50_hw_query { uint32_t sequence; struct nouveau_bo *bo; uint32_t base_offset; - uint32_t offset; /* base + i * 32 */ + uint32_t offset; /* base + i * rotate */ uint8_t state; bool is64bit; + uint8_t rotate; int nesting; /* only used for occlusion queries */ struct nouveau_mm_allocation *mm; struct nouveau_fence *fence; -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 3/4] nv50: move HW queries to nv50_query_hw.c/h files
Changes since v2: - remove unused 'nv50_hw_query_funcs' struct Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/Makefile.sources | 2 + src/gallium/drivers/nouveau/nv50/nv50_query.c | 354 ++- src/gallium/drivers/nouveau/nv50/nv50_query.h | 26 +- src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 389 + src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 39 +++ .../drivers/nouveau/nv50/nv50_shader_state.c | 7 +- src/gallium/drivers/nouveau/nv50/nv50_state.c | 3 +- src/gallium/drivers/nouveau/nv50/nv50_vbo.c| 5 +- 8 files changed, 476 insertions(+), 349 deletions(-) create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 06d9d97..83f8113 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -74,6 +74,8 @@ NV50_C_SOURCES := \ nv50/nv50_push.c \ nv50/nv50_query.c \ nv50/nv50_query.h \ + nv50/nv50_query_hw.c \ + nv50/nv50_query_hw.h \ nv50/nv50_resource.c \ nv50/nv50_resource.h \ nv50/nv50_screen.c \ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c index 1b4abdb..dd9b85b 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c @@ -26,334 +26,45 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query.h" -#include "nv_object.xml.h" - -#define NV50_QUERY_STATE_READY 0 -#define NV50_QUERY_STATE_ACTIVE 1 -#define NV50_QUERY_STATE_ENDED 2 -#define NV50_QUERY_STATE_FLUSHED 3 - -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts - * (since we use only a single GPU channel per screen) will not work properly. - * - * The first is not that big of an issue because OpenGL does not allow nested - * queries anyway. - */ - -#define NV50_QUERY_ALLOC_SPACE 256 - -static bool -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size) -{ - struct nv50_screen *screen = nv50->screen; - int ret; - - if (q->bo) { - nouveau_bo_ref(NULL, &q->bo); - if (q->mm) { - if (q->state == NV50_QUERY_STATE_READY) -nouveau_mm_free(q->mm); - else -nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, - q->mm); - } - } - if (size) { - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base); - if (!q->bo) - return false; - q->offset = q->base; - - ret = nouveau_bo_map(q->bo, 0, screen->base.client); - if (ret) { - nv50_query_allocate(nv50, q, 0); - return false; - } - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); - } - return true; -} - -static void -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); - FREE(nv50_query(pq)); -} +#include "nv50/nv50_query_hw.h" static struct pipe_query * -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) { struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q; - q = CALLOC_STRUCT(nv50_query); - if (!q) - return NULL; - - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { - FREE(q); - return NULL; - } - - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || - type == PIPE_QUERY_PRIMITIVES_EMITTED || - type == PIPE_QUERY_SO_STATISTICS || - type == PIPE_QUERY_PIPELINE_STATISTICS); - q->type = type; - - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { - q->offset -= 32; - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ - } - + q = nv50_hw_create_query(nv50, type, index); return (struct pipe_query *)q; } static void -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, - unsigned offset, uint32_t get) +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) { - offset += q->offset; - - PUSH_SPACE(push, 5); - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); - PUSH_DATAh(push, q->bo->offset + offset); - PUSH_DATA (push, q->bo->offset + offset); - PUSH_DATA (push, q->sequence); - PUSH_DATA (push, get); + struct nv50_query *q = nv50_query(pq); + q->fun
[Mesa-dev] [PATCH] nvc0: expose a group of performance metrics on Fermi
This allows to monitor those performance metrics through GL_AMD_performance_monitor. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_query.c | 14 +- src/gallium/drivers/nouveau/nvc0/nvc0_query.h | 3 ++- src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c index e4752e2..f539210 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c @@ -28,6 +28,7 @@ #include "nvc0/nvc0_query.h" #include "nvc0/nvc0_query_sw.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_query_hw_metric.h" #include "nvc0/nvc0_query_hw_sm.h" static struct pipe_query * @@ -188,7 +189,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, count++; } else if (screen->base.class_3d < NVE4_3D_CLASS) { -count++; +count += 2; } } } @@ -218,6 +219,17 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen, return 1; } } + } else + if (id == NVC0_HW_METRIC_QUERY_GROUP) { + if (screen->compute) { + if (screen->base.class_3d < NVE4_3D_CLASS) { +info->name = "Performance metrics"; +info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU; +info->max_active_queries = 1; +info->num_queries = NVC0_HW_METRIC_QUERY_COUNT; +return 1; + } + } } #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h index 6883ab6..c46361c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.h @@ -32,7 +32,8 @@ nvc0_query(struct pipe_query *pipe) * Driver queries groups: */ #define NVC0_HW_SM_QUERY_GROUP 0 -#define NVC0_SW_QUERY_DRV_STAT_GROUP 1 +#define NVC0_HW_METRIC_QUERY_GROUP 1 +#define NVC0_SW_QUERY_DRV_STAT_GROUP 2 void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c index 25aa09b..fb2806a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c @@ -431,7 +431,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id, id = nvc0_hw_metric_get_next_query_id(queries, id); info->name = nvc0_hw_metric_names[id]; info->query_type = NVC0_HW_METRIC_QUERY(id); -info->group_id = -1; +info->group_id = NVC0_HW_METRIC_QUERY_GROUP; return 1; } } -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 1/7] nvc0: fix crash when nv50_miptree_from_handle fails
On 10/26/2015 01:44 PM, Julien Isorce wrote: On 25 October 2015 at 21:38, Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>> wrote: Do you need someone to push this patch? Yes please Pushed. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails
On 10/27/2015 02:01 PM, samuel.pitoiset wrote: On 27/10/2015 12:52, Emil Velikov wrote: On 27 October 2015 at 10:50, samuel.pitoiset wrote: On 27/10/2015 11:37, Emil Velikov wrote: On 22 October 2015 at 00:16, Julien Isorce wrote: The real fix is in nouveau_drm_winsys.c by setting dev to 0. Which means dev's ownership has been passed to previous call. Other changes are there to be consistent with what the screen_create functions already do on errors. Encountered this crash because nvc0_screen_create sometimes fails with: nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: -16 Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354 Signed-off-by: Julien Isorce --- src/gallium/drivers/nouveau/nv30/nv30_screen.c | 5 - src/gallium/drivers/nouveau/nv50/nv50_screen.c | 4 +++- src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 0330164..9b8ddac 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev) unsigned oclass = 0; int ret, i; - if (!screen) + if (!screen) { + nouveau_device_del(&dev); return NULL; + } Imho having these in screen_create() seems like the wrong 'layer'. Shouldn't one call nouveau_device_dev() from within nouveau_drm_screen_unref and explicitly call the latter if the calloc() (here and in nv50/nvc0) fails ? We can't do that because nouveau_drm_screen_unref() needs a valid nouveau_screen object and in this case it is NULL. Ouch I was under the impression that we've brought back the concept of winsys in nouveau with the hash_table patches. Seems like we haven't :( If we are to do so (split things just like the radeon/amdgpu winsys) then we can kill two birds with one stone. The missing device_del() on calloc failure as well as other error paths in nvxx_screen_create(). Okay, I'll have a look at how radeon/amdgpu split those things. Well, this doesn't seem to be "trivial" to do it properly actually. This is on my todolist (but not with a top priority) so, if someone else want to send a patch for this stuff, feel free to do it. :) I agree that it's not really an elegant fix but we don't really have the choice actually. In my opinion, this is not that bad. I never said it's "bad" just the wrong place for the fix. Or in other words - if we're to fix things might as well do it properly :-) Sure, I agree. :) -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] nvc0: add missing compute parameters required by clover
This fixes crashes with some piglit OpenCL tests. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ea317a5..ccaab44 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -353,7 +353,8 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + struct nvc0_screen *screen = nvc0_screen(pscreen); + const uint16_t obj_class = screen->compute->oclass; #define RET(x) do { \ if (data) \ @@ -384,6 +385,14 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, RET((uint64_t []) { 4096ul }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: RET((uint32_t []) { 32u }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ul << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0u }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count_compute }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512u }); /* FIXME: arbitrary limit */ default: return 0; } -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()
To get the size (in bytes) of a compute parameter, clover first calls get_compute_param() with a NULL data pointer. The RET() macro is based on nv50. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 -- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 6aa4f0b..ea317a5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -353,45 +353,42 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - uint64_t *data64 = (uint64_t *)data; - uint32_t *data32 = (uint32_t *)data; const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x));\ + return sizeof(x); \ +} while (0) + switch (param) { case PIPE_COMPUTE_CAP_GRID_DIMENSION: - data64[0] = 3; - return 8; + RET((uint64_t []) { 3ul }); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535; - data64[1] = 65535; - data64[2] = 65535; - return 24; + if (obj_class >= NVE4_COMPUTE_CLASS) { + RET(((uint64_t []) { 0x7fff, 65535ul, 65535ul })); + } else { + RET(((uint64_t []) { 65535ul, 65535ul, 65535ul })); + } case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - data64[0] = 1024; - data64[1] = 1024; - data64[2] = 64; - return 24; + RET(((uint64_t []) { 1024ul, 1024ul, 64ul })); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - data64[0] = 1024; - return 8; + RET((uint64_t []) { 1024ul }); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ - data64[0] = (uint64_t)1 << 40; - return 8; + RET((uint64_t []) { 1ul << 40 }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ - data64[0] = 48 << 10; - return 8; + RET((uint64_t []) { 48ul << 10 }); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ - data64[0] = 512 << 10; - return 8; + RET((uint64_t []) { 512ul << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ - data64[0] = 4096; - return 8; + RET((uint64_t []) { 4096ul }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - data32[0] = 32; - return 4; + RET((uint32_t []) { 32u }); default: return 0; } + +#undef RET } static void -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()
On 11/03/2015 07:26 PM, Ilia Mirkin wrote: On Tue, Nov 3, 2015 at 1:35 PM, Samuel Pitoiset wrote: To get the size (in bytes) of a compute parameter, clover first calls get_compute_param() with a NULL data pointer. The RET() macro is based on nv50. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 -- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 6aa4f0b..ea317a5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -353,45 +353,42 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - uint64_t *data64 = (uint64_t *)data; - uint32_t *data32 = (uint32_t *)data; const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x));\ + return sizeof(x); \ +} while (0) + switch (param) { case PIPE_COMPUTE_CAP_GRID_DIMENSION: - data64[0] = 3; - return 8; + RET((uint64_t []) { 3ul }); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535; - data64[1] = 65535; - data64[2] = 65535; - return 24; + if (obj_class >= NVE4_COMPUTE_CLASS) { + RET(((uint64_t []) { 0x7fff, 65535ul, 65535ul })); Why the ul's everywhere? And why not on the 0x7 ? Based on curro's branch for nv50 compute support, but I assume I can get rid of this. + } else { + RET(((uint64_t []) { 65535ul, 65535ul, 65535ul })); + } case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - data64[0] = 1024; - data64[1] = 1024; - data64[2] = 64; - return 24; + RET(((uint64_t []) { 1024ul, 1024ul, 64ul })); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - data64[0] = 1024; - return 8; + RET((uint64_t []) { 1024ul }); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ - data64[0] = (uint64_t)1 << 40; - return 8; + RET((uint64_t []) { 1ul << 40 }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ - data64[0] = 48 << 10; - return 8; + RET((uint64_t []) { 48ul << 10 }); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ - data64[0] = 512 << 10; - return 8; + RET((uint64_t []) { 512ul << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ - data64[0] = 4096; - return 8; + RET((uint64_t []) { 4096ul }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - data32[0] = 32; - return 4; + RET((uint32_t []) { 32u }); default: return 0; } + +#undef RET } static void -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 2/2] nvc0: add missing compute parameters required by clover
This fixes crashes with some piglit OpenCL tests. Changes since v2: - get rid of ul suffixes when they are unnecessary Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 52ce2d5..6ad3980 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -353,7 +353,8 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; + struct nvc0_screen *screen = nvc0_screen(pscreen); + const uint16_t obj_class = screen->compute->oclass; #define RET(x) do { \ if (data) \ @@ -384,6 +385,14 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, RET((uint64_t []) { 4096 }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: RET((uint32_t []) { 32 }); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + RET((uint64_t []) { 1ULL << 40 }); + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + RET((uint32_t []) { 0 }); + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + RET((uint32_t []) { screen->mp_count_compute }); + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */ default: return 0; } -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 1/2] nvc0: handle NULL pointer in nvc0_get_compute_param()
To get the size (in bytes) of a compute parameter, clover first calls get_compute_param() with a NULL data pointer. The RET() macro is based on nv50. Changes since v2: - get rid of ul suffixes when they are unnecessary Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 45 -- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 6aa4f0b..52ce2d5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -353,45 +353,42 @@ static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, enum pipe_compute_cap param, void *data) { - uint64_t *data64 = (uint64_t *)data; - uint32_t *data32 = (uint32_t *)data; const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass; +#define RET(x) do { \ + if (data) \ + memcpy(data, x, sizeof(x));\ + return sizeof(x); \ +} while (0) + switch (param) { case PIPE_COMPUTE_CAP_GRID_DIMENSION: - data64[0] = 3; - return 8; + RET((uint64_t []) { 3 }); case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fff : 65535; - data64[1] = 65535; - data64[2] = 65535; - return 24; + if (obj_class >= NVE4_COMPUTE_CLASS) { + RET(((uint64_t []) { 0x7fff, 65535, 65535 })); + } else { + RET(((uint64_t []) { 65535, 65535, 65535 })); + } case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - data64[0] = 1024; - data64[1] = 1024; - data64[2] = 64; - return 24; + RET(((uint64_t []) { 1024, 1024, 64 })); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - data64[0] = 1024; - return 8; + RET((uint64_t []) { 1024 }); case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */ - data64[0] = (uint64_t)1 << 40; - return 8; + RET((uint64_t []) { 1ULL << 40 }); case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */ - data64[0] = 48 << 10; - return 8; + RET((uint64_t []) { 48 << 10 }); case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */ - data64[0] = 512 << 10; - return 8; + RET((uint64_t []) { 512 << 10 }); case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */ - data64[0] = 4096; - return 8; + RET((uint64_t []) { 4096 }); case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - data32[0] = 32; - return 4; + RET((uint32_t []) { 32 }); default: return 0; } + +#undef RET } static void -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/hud: document GALLIUM_HUD_PERIOD in envvars.html.
Reviewed-by: Samuel Pitoiset On 11/04/2015 06:24 AM, Jimmy Berry wrote: --- docs/envvars.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/envvars.html b/docs/envvars.html index bdfe999..173c941 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -179,6 +179,8 @@ Mesa EGL supports different sets of environment variables. See the GALLIUM_HUD - draws various information on the screen, like framerate, cpu load, driver statistics, performance counters, etc. Set GALLIUM_HUD=help and run e.g. glxgears for more info. +GALLIUM_HUD_PERIOD - sets the hud update rate in seconds (float). Use zero +to update every frame. The default period is 1/2 second. GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc. rather than stderr. GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment -- -Samuel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2] gallium/hud: control visibility at startup and runtime.
Hi Jimmy, Some comments below. On 11/04/2015 06:17 AM, Jimmy Berry wrote: - env GALLIUM_HUD_VISIBLE: control default visibility - env GALLIUM_HUD_SIGNAL_TOGGLE: toggle visibility via signal --- Thanks for the feedback. I believe all the suggested changes have been implemented. One note, all the logic except for the toggle was already in hud_create() and not hud_draw(). On the subject of allowing the user to specify the signo to use. It was suggested in the original thread that using a fixed signal might end up stealing signals from the parent application. Seems like the user should except funny behavior if they set the signal to something like SIGKILL. I am not opposed to a fixed signo or alternatively providing a default. Something like: GALLIUM_HUD_TOGGLE_SIGNAL=-1 # (results in SIGUSR1) docs/envvars.html | 6 ++ src/gallium/auxiliary/hud/hud_context.c | 29 + 2 files changed, 35 insertions(+) diff --git a/docs/envvars.html b/docs/envvars.html index bdfe999..530bbb7 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -179,6 +179,12 @@ Mesa EGL supports different sets of environment variables. See the GALLIUM_HUD - draws various information on the screen, like framerate, cpu load, driver statistics, performance counters, etc. Set GALLIUM_HUD=help and run e.g. glxgears for more info. +GALLIUM_HUD_VISIBLE - control default visibility, defaults to true. +GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal. +Especially useful to toggle hud at specific points of application and +disable for unencumbered viewing the rest of the time. For example, set +GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_SIGNAL_TOGGLE to 10 (SIGUSR1). +Use kill -10 to toggle the hud as desired. GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc. rather than stderr. GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index ffe30b8..bffbc2f 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -33,6 +33,7 @@ * Set GALLIUM_HUD=help for more info. */ +#include #include #include "hud/hud_context.h" @@ -51,6 +52,8 @@ #include "tgsi/tgsi_text.h" #include "tgsi/tgsi_dump.h" +/* controlls the visibility of all hud contexts */ "Control the visibility of all HUD contexts" +static boolean huds_visible = TRUE; Maybe, hud_is_hidden or something looks like a better name. struct hud_context { struct pipe_context *pipe; @@ -95,6 +98,11 @@ struct hud_context { } text, bg, whitelines; }; +static void +signal_visible_handler(int sig, siginfo_t *siginfo, void *context) +{ + huds_visible = !huds_visible; +} static void hud_draw_colored_prims(struct hud_context *hud, unsigned prim, @@ -441,6 +449,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex) struct hud_pane *pane; struct hud_graph *gr; + if (!huds_visible) + return; + hud->fb_width = tex->width0; hud->fb_height = tex->height0; hud->constants.two_div_fb_width = 2.0f / hud->fb_width; @@ -1125,6 +1136,10 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) struct pipe_sampler_view view_templ; unsigned i; const char *env = debug_get_option("GALLIUM_HUD", NULL); + long signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0); + boolean sig_handled = FALSE; + struct sigaction action; + huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE); if (!env || !*env) return NULL; @@ -1267,6 +1282,20 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso) LIST_INITHEAD(&hud->pane_list); + /* setup sig handler once for all hud contexts */ + if (!sig_handled) { + memset(&action, 0, sizeof(action)); I think you can get rid of this memset() by doing 'struct sigaction action = {};' above. + action.sa_sigaction = &signal_visible_handler; + action.sa_flags = SA_SIGINFO; + + if (signo < 1 || signo >= NSIG) + fprintf(stderr, "gallium_hud: invalid signal %ld\n", signo); + else if (sigaction(signo, &action, NULL) < 0) + fprintf(stderr, "gallium_hud: unable to set handler for signal %ld\n", signo); + + sig_handled = TRUE; + } + hud_parse_env_var(hud, env); return hud; } -- -Samuel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nvc0: enable compute support on Fermi
Altough the compute support is still not complete because textures and surfaces need to be implemented, it allows to launch very simple compute kernel like one which reads reading MP performance counters. This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7d96977..5b7b39b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: - return (class_3d == NVE4_3D_CLASS) ? 1 : 0; + return 1; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; @@ -245,8 +245,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 0; break; case PIPE_SHADER_COMPUTE: - if (class_3d != NVE4_3D_CLASS) - return 0; break; default: return 0; -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: enable compute support on Fermi
On 11/06/2015 12:43 AM, Ilia Mirkin wrote: On Thu, Nov 5, 2015 at 6:41 PM, Samuel Pitoiset wrote: Altough the compute support is still not complete because textures and surfaces need to be implemented, it allows to launch very simple compute kernel like one which reads reading MP performance counters. Didn't those end up breaking 3d rendering? Have you figured out what was overwriting what? This doesn't break any stuff related to 3D rendering. The compute kernel for reading perf counters has been tested a lot on different chips. The compute support is already enabled on Kepler and it doesn't seem to break 3D rendering, btw. In the series which fixed those perf counters, I actually introduced a bug which has been fixed since: fc5ae0c13f71f049065b1422c20491d2264ae164 This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7d96977..5b7b39b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: - return (class_3d == NVE4_3D_CLASS) ? 1 : 0; + return 1; Of course this also enables it for NVF0_3D_CLASS. Pretty sure compute doesn't work there for some dumb reason (like we're missing some in our ctxsw fw...) case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; @@ -245,8 +245,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 0; break; case PIPE_SHADER_COMPUTE: - if (class_3d != NVE4_3D_CLASS) - return 0; break; default: return 0; -- 2.5.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nvc0: enable compute support on Fermi
On 11/06/2015 11:23 AM, Hans de Goede wrote: Hi, On 06-11-15 00:51, Samuel Pitoiset wrote: On 11/06/2015 12:43 AM, Ilia Mirkin wrote: On Thu, Nov 5, 2015 at 6:41 PM, Samuel Pitoiset wrote: Altough the compute support is still not complete because textures and surfaces need to be implemented, it allows to launch very simple compute kernel like one which reads reading MP performance counters. Didn't those end up breaking 3d rendering? Have you figured out what was overwriting what? This doesn't break any stuff related to 3D rendering. The compute kernel for reading perf counters has been tested a lot on different chips. The compute support is already enabled on Kepler and it doesn't seem to break 3D rendering, btw. In the series which fixed those perf counters, I actually introduced a bug which has been fixed since: fc5ae0c13f71f049065b1422c20491d2264ae164 This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE. Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 7d96977..5b7b39b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: - return (class_3d == NVE4_3D_CLASS) ? 1 : 0; + return 1; Of course this also enables it for NVF0_3D_CLASS. Pretty sure compute doesn't work there for some dumb reason (like we're missing some in our ctxsw fw...) Hmm, my only compute capable card actually is a nvf0 card (gk208 based). Can you provide some quick test instructions how I can test compute on that card (with the patch from this thread applied) ? And if it does not work, any suggestions how to go about debugging this ? Or better any info I can provide to help you debug this :) Unfortunately, the compute support is only supported on Fermi and Kepler (< GK110). I could have a look and implement it for your card but since I don't have this chipset, this is not going to be easy. Anyway, the first step is to trace what the blob does using valgrind-mmt. Basically, the vectorAdd sample in CUDA should do the job http://nouveau.freedesktop.org/wiki/Valgrind-mmt/ Once it's done, please send me the MMT trace. But the quickest way for you to test that compute support would be to have a chip < GK110 :-) Regards, Hans -- -Samuel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support
On 11/09/2015 07:40 PM, Ilia Mirkin wrote: Signed-off-by: Ilia Mirkin --- docs/GL3.txt| 2 +- docs/relnotes/11.1.0.html | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82 + 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 7abdcd8..da0ffca 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storageDONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware) + GL_ARB_clear_texture DONE (i965, nvc0) GL_ARB_enhanced_layouts in progress (Timothy) - compile-time constant expressions in progress - explicit byte offsets for blocks in progress diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 11fbdff..33fd0b8 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_arrays_of_arrays on i965 GL_ARB_blend_func_extended on freedreno (a3xx) +GL_ARB_clear_texture on nvc0 GL_ARB_copy_image on nv50, nvc0, radeonsi GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips GL_ARB_gpu_shader5 on r600 for Evergreen and later chips diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f2e3bf0..fbeec7f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_CLEAR_TEXTURE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_CLEAR_TEXTURE: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 5f47bad..3ae9943 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe, PUSH_DATA(push, dst->u.tex.first_layer + sf->depth); PUSH_DATA(push, mt->layer_stride >> 2); PUSH_DATA(push, dst->u.tex.first_layer); + + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); } else { if (res->base.target == PIPE_BUFFER) { PUSH_DATA(push, 262144); @@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); PUSH_DATA (push, dst->u.tex.first_layer); + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { @@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } +static void +nvc0_clear_texture(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_surface sf = {{{0}}}; I'm just curious about this, does '= {}' is not enough? + + assert(res->target != PIPE_BUFFER); + + sf.base.texture = res; + sf.base.format = res->format; + sf.base.u.tex.first_layer = box->z; + sf.base.u.tex.last_layer = box->depth; + sf.base.u.tex.level = level; + sf.base.width = sf.width = res->width0 << mt->ms_x; + sf.base.height = sf.height = res->height0 << mt->ms_y; + sf.depth = box->depth; + sf.offset = mt->level[level].offset; + + if (util_format_is_depth_or_stencil(res->format)) { + float depth = 0; + uint8_t stencil = 0; + unsigned clear = 0; + const struct util_format_description *desc = + util_format_description(res->format); + + if (util_format_has_depth(desc)) { + clear |= PIPE_CLEAR_DEPTH; + desc->unpack_z_float(&depth, 0, data, 0, 1, 1); + } + if (util_format_has_stencil(desc)) { + clear |= PIPE_CLEAR_STENCIL; +
Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support
On 11/09/2015 09:03 PM, Ilia Mirkin wrote: On Mon, Nov 9, 2015 at 2:58 PM, Samuel Pitoiset wrote: On 11/09/2015 07:40 PM, Ilia Mirkin wrote: Signed-off-by: Ilia Mirkin --- docs/GL3.txt| 2 +- docs/relnotes/11.1.0.html | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82 + 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 7abdcd8..da0ffca 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storageDONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware) + GL_ARB_clear_texture DONE (i965, nvc0) GL_ARB_enhanced_layouts in progress (Timothy) - compile-time constant expressions in progress - explicit byte offsets for blocks in progress diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 11fbdff..33fd0b8 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_arrays_of_arrays on i965 GL_ARB_blend_func_extended on freedreno (a3xx) +GL_ARB_clear_texture on nvc0 GL_ARB_copy_image on nv50, nvc0, radeonsi GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips GL_ARB_gpu_shader5 on r600 for Evergreen and later chips diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f2e3bf0..fbeec7f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_CLEAR_TEXTURE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_CLEAR_TEXTURE: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 5f47bad..3ae9943 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe, PUSH_DATA(push, dst->u.tex.first_layer + sf->depth); PUSH_DATA(push, mt->layer_stride >> 2); PUSH_DATA(push, dst->u.tex.first_layer); + + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); } else { if (res->base.target == PIPE_BUFFER) { PUSH_DATA(push, 262144); @@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); PUSH_DATA (push, dst->u.tex.first_layer); + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { @@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } +static void +nvc0_clear_texture(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_surface sf = {{{0}}}; I'm just curious about this, does '= {}' is not enough? I wanted to be *really* sure it got initialized... figured 3 sets was enough :) But seriously -- allegedly some compilers don't like that. I can't be bothered to check on the actual situation, so I'm including the 0 in there. And gcc wanted more {} since the first field was a struct whose first field was a struct, etc. Okay, I think it's *really* initialized. :) But maybe, a memset() call could be better (really doesn't matter). Anyway, I'm not quite familiar with that part of the driver to add a Rb but it looks fine. I hope you did a full piglit run this time. ;) -ilia
Re: [Mesa-dev] [PATCH 3/3] nvc0: add ARB_clear_texture support
On 11/09/2015 09:14 PM, Ilia Mirkin wrote: On Mon, Nov 9, 2015 at 3:13 PM, Samuel Pitoiset wrote: On 11/09/2015 09:03 PM, Ilia Mirkin wrote: On Mon, Nov 9, 2015 at 2:58 PM, Samuel Pitoiset wrote: On 11/09/2015 07:40 PM, Ilia Mirkin wrote: Signed-off-by: Ilia Mirkin --- docs/GL3.txt| 2 +- docs/relnotes/11.1.0.html | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 82 + 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 7abdcd8..da0ffca 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40: GL_MAX_VERTEX_ATTRIB_STRIDE DONE (all drivers) GL_ARB_buffer_storageDONE (i965, nv50, nvc0, r600, radeonsi) - GL_ARB_clear_texture DONE (i965) (gallium - in progress, VMware) + GL_ARB_clear_texture DONE (i965, nvc0) GL_ARB_enhanced_layouts in progress (Timothy) - compile-time constant expressions in progress - explicit byte offsets for blocks in progress diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html index 11fbdff..33fd0b8 100644 --- a/docs/relnotes/11.1.0.html +++ b/docs/relnotes/11.1.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_arrays_of_arrays on i965 GL_ARB_blend_func_extended on freedreno (a3xx) +GL_ARB_clear_texture on nvc0 GL_ARB_copy_image on nv50, nvc0, radeonsi GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips GL_ARB_gpu_shader5 on r600 for Evergreen and later chips diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index f2e3bf0..fbeec7f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_FORCE_PERSAMPLE_INTERP: case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_CLEAR_TEXTURE: return 1; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; @@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - case PIPE_CAP_CLEAR_TEXTURE: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 5f47bad..3ae9943 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -319,6 +319,8 @@ nvc0_clear_render_target(struct pipe_context *pipe, PUSH_DATA(push, dst->u.tex.first_layer + sf->depth); PUSH_DATA(push, mt->layer_stride >> 2); PUSH_DATA(push, dst->u.tex.first_layer); + + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); } else { if (res->base.target == PIPE_BUFFER) { PUSH_DATA(push, 262144); @@ -540,6 +542,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth)); BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1); PUSH_DATA (push, dst->u.tex.first_layer); + IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode); BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth); for (z = 0; z < sf->depth; ++z) { @@ -550,6 +553,84 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; } +static void +nvc0_clear_texture(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned level, + const struct pipe_box *box, + const void *data) +{ + struct nv50_miptree *mt = nv50_miptree(res); + struct nv50_surface sf = {{{0}}}; I'm just curious about this, does '= {}' is not enough? I wanted to be *really* sure it got initialized... figured 3 sets was enough :) But seriously -- allegedly some compilers don't like that. I can't be bothered to check on the actual situation, so I'm including the 0 in there. And gcc wanted more {} since the first field was a struct whose first field was a struct, etc. Okay, I think it's *really* initialized. :) But maybe, a memset() call could be better (really doesn't matter). Should be functionally equivalent, but less verbose.
Re: [Mesa-dev] [PATCH] nouveau: fix double free when screen_create fails
Hi Emil, On 11/10/2015 04:35 PM, Emil Velikov wrote: Hi Samuel, Sorry about this I thought I already replied :-\ On 29 October 2015 at 22:22, Samuel Pitoiset wrote: On 10/27/2015 02:01 PM, samuel.pitoiset wrote: On 27/10/2015 12:52, Emil Velikov wrote: On 27 October 2015 at 10:50, samuel.pitoiset wrote: On 27/10/2015 11:37, Emil Velikov wrote: On 22 October 2015 at 00:16, Julien Isorce wrote: The real fix is in nouveau_drm_winsys.c by setting dev to 0. Which means dev's ownership has been passed to previous call. Other changes are there to be consistent with what the screen_create functions already do on errors. Encountered this crash because nvc0_screen_create sometimes fails with: nvc0_screen_create:717 - Error allocating PGRAPH context for M2MF: -16 Also see: https://bugs.freedesktop.org/show_bug.cgi?id=70354 Signed-off-by: Julien Isorce --- src/gallium/drivers/nouveau/nv30/nv30_screen.c | 5 - src/gallium/drivers/nouveau/nv50/nv50_screen.c | 4 +++- src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index 0330164..9b8ddac 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -425,8 +425,10 @@ nv30_screen_create(struct nouveau_device *dev) unsigned oclass = 0; int ret, i; - if (!screen) + if (!screen) { + nouveau_device_del(&dev); return NULL; + } Imho having these in screen_create() seems like the wrong 'layer'. Shouldn't one call nouveau_device_dev() from within nouveau_drm_screen_unref and explicitly call the latter if the calloc() (here and in nv50/nvc0) fails ? We can't do that because nouveau_drm_screen_unref() needs a valid nouveau_screen object and in this case it is NULL. Ouch I was under the impression that we've brought back the concept of winsys in nouveau with the hash_table patches. Seems like we haven't :( If we are to do so (split things just like the radeon/amdgpu winsys) then we can kill two birds with one stone. The missing device_del() on calloc failure as well as other error paths in nvxx_screen_create(). Okay, I'll have a look at how radeon/amdgpu split those things. Well, this doesn't seem to be "trivial" to do it properly actually. This is on my todolist (but not with a top priority) so, if someone else want to send a patch for this stuff, feel free to do it. :) On the contrary - it's pretty trivial 99% of the work is either code movement or sed job. On the other hand, it's might not turn out to be stable material (rather large diff). So if please a comment or two (something resembling my suggestion) and get feel free to push it. Roughly how many things do you have in your mesa todo list prior to nouveau_winsys ? Lot of things, mostly related to performance counters! ;) Fixing a segfault when something else has failed doesn't sound like to be a top priority for me. But... I agree this should be fixed, I'll have a look this month. Cheers, Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev