As explained in the CUDA toolkit documentation, "a metric is a
characteristic of an application that is calculated from one or more
event values."
Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
src/gallium/drivers/nouveau/Makefile.sources | 2 +
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +-
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 444
+++++++++++++++++++++
.../drivers/nouveau/nvc0/nvc0_query_hw_metric.h | 42 ++
4 files changed, 504 insertions(+), 3 deletions(-)
create mode 100644
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
create mode 100644
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
diff --git a/src/gallium/drivers/nouveau/Makefile.sources
b/src/gallium/drivers/nouveau/Makefile.sources
index edc6cf4..c18e9f5 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -154,6 +154,8 @@ NVC0_C_SOURCES := \
nvc0/nvc0_query.h \
nvc0/nvc0_query_hw.c \
nvc0/nvc0_query_hw.h \
+ nvc0/nvc0_query_hw_metric.c \
+ nvc0/nvc0_query_hw_metric.h \
nvc0/nvc0_query_hw_sm.c \
nvc0/nvc0_query_hw_sm.h \
nvc0/nvc0_query_sw.c \
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 91254be..90ee82f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -25,6 +25,7 @@
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_query_hw.h"
+#include "nvc0/nvc0_query_hw_metric.h"
#include "nvc0/nvc0_query_hw_sm.h"
#define NVC0_HW_QUERY_STATE_READY 0
@@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0,
unsigned type, unsigned index)
return (struct nvc0_query *)hq;
}
+ hq = nvc0_hw_metric_create_query(nvc0, type);
+ if (hq) {
+ hq->base.funcs = &hw_query_funcs;
+ return (struct nvc0_query *)hq;
+ }
+
hq = CALLOC_STRUCT(nvc0_hw_query);
if (!hq)
return NULL;
@@ -435,14 +442,20 @@ int
nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
struct pipe_driver_query_info *info)
{
- int num_hw_sm_queries = 0;
+ int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0,
NULL);
+ num_hw_metric_queries =
+ nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);
if (!info)
- return num_hw_sm_queries;
+ return num_hw_sm_queries + num_hw_metric_queries;
+
+ if (id < num_hw_sm_queries)
+ return nvc0_hw_sm_get_driver_query_info(screen, id, info);
- return nvc0_hw_sm_get_driver_query_info(screen, id, info);
+ return nvc0_hw_metric_get_driver_query_info(screen,
+ id - num_hw_sm_queries,
info);
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
new file mode 100644
index 0000000..dbe350a
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2015 Samuel Pitoiset
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
a
+ * copy of this software and associated documentation files (the
"Software"),
+ * to deal in the Software without restriction, including without
limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_query_hw_metric.h"
+#include "nvc0/nvc0_query_hw_sm.h"
+
+/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
+static const char *nvc0_hw_metric_names[] =
+{
+ "metric-achieved_occupancy",
+ "metric-branch_efficiency",
+ "metric-inst_issued",
+ "metric-inst_per_wrap",
+ "metric-inst_replay_overhead",
+ "metric-issued_ipc",
+ "metric-issue_slots",
+ "metric-issue_slot_utilization",
+ "metric-ipc",
+};
+
+struct nvc0_hw_metric_query_cfg {
+ uint32_t queries[8];
+ uint32_t num_queries;
+};
+
+#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
+#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c
+
+/* ==== Compute capability 2.0 (GF100/GF110) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm20_achieved_occupancy =
+{
+ .queries[0] = _SM(ACTIVE_WARPS),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_branch_efficiency =
+{
+ .queries[0] = _SM(BRANCH),
+ .queries[1] = _SM(DIVERGENT_BRANCH),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_inst_per_wrap =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(WARPS_LAUNCHED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_inst_replay_overhead =
+{
+ .queries[0] = _SM(INST_ISSUED),
+ .queries[1] = _SM(INST_EXECUTED),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_issued_ipc =
+{
+ .queries[0] = _SM(INST_ISSUED),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm20_ipc =
+{
+ .queries[0] = _SM(INST_EXECUTED),
+ .queries[1] = _SM(ACTIVE_CYCLES),
+ .num_queries = 2,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
+{
+ _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
+ _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
+ _M(INST_ISSUED, NULL),
+ _M(INST_PER_WRAP, &sm20_inst_per_wrap),
+ _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead),
+ _M(ISSUED_IPC, &sm20_issued_ipc),
+ _M(ISSUE_SLOTS, NULL),
+ _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc),
+ _M(IPC, &sm20_ipc),
+};
+
+/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
+static const struct nvc0_hw_metric_query_cfg
+sm21_inst_issued =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .num_queries = 4,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm21_inst_replay_overhead =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .queries[4] = _SM(INST_EXECUTED),
+ .num_queries = 5,
+};
+
+static const struct nvc0_hw_metric_query_cfg
+sm21_issued_ipc =
+{
+ .queries[0] = _SM(INST_ISSUED1_0),
+ .queries[1] = _SM(INST_ISSUED1_1),
+ .queries[2] = _SM(INST_ISSUED2_0),
+ .queries[3] = _SM(INST_ISSUED2_1),
+ .queries[4] = _SM(ACTIVE_CYCLES),
+ .num_queries = 5,
+};
+
+static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
+{
+ _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
+ _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
+ _M(INST_ISSUED, &sm21_inst_issued),
+ _M(INST_PER_WRAP, &sm20_inst_per_wrap),
+ _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead),
+ _M(ISSUED_IPC, &sm21_issued_ipc),
+ _M(ISSUE_SLOTS, &sm21_inst_issued),
+ _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc),
+ _M(IPC, &sm20_ipc),
+};
+
+#undef _SM
+#undef _M
+
+static inline const struct nvc0_hw_metric_query_cfg **
+nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
+{
+ struct nouveau_device *dev = screen->base.device;
+
+ if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
+ return sm20_hw_metric_queries;
+ return sm21_hw_metric_queries;
+}
+
+static const struct nvc0_hw_metric_query_cfg *
+nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0,
+ struct nvc0_hw_query *hq)
+{
+ const struct nvc0_hw_metric_query_cfg **queries;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_query *q = &hq->base;
+
+ queries = nvc0_hw_metric_get_queries(screen);
+ return queries[q->type - NVC0_HW_METRIC_QUERY(0)];
+}
+
+static void
+nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
+ struct nvc0_hw_query *hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
+ FREE(hmq);
+}
+
+static boolean
+nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct
nvc0_hw_query *hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ boolean ret = false;
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++) {
+ ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
+ if (!ret)
+ return ret;
+ }
+ return ret;
+}
+
+static void
+nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query
*hq)
+{
+ struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
+ unsigned i;
+
+ for (i = 0; i < hmq->num_queries; i++)
+ hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
+}
+
+static uint64_t
+sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
+{
+ uint64_t value = 0;
+
+ switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
+ case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
+ /* (active_warps / active_cycles) / max. number of warps on a MP
*/
+ if (res64[1])
+ value = (res64[0] / (float)res64[1]) / 48;