On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > As explained in the CUDA toolkit documentation, "a metric is a > characteristic of an application that is calculated from one or more > event values." > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/gallium/drivers/nouveau/Makefile.sources | 2 + > src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- > .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 444 > +++++++++++++++++++++ > .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h | 42 ++ > 4 files changed, 504 insertions(+), 3 deletions(-) > create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c > create mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h > > diff --git a/src/gallium/drivers/nouveau/Makefile.sources > b/src/gallium/drivers/nouveau/Makefile.sources > index edc6cf4..c18e9f5 100644 > --- a/src/gallium/drivers/nouveau/Makefile.sources > +++ b/src/gallium/drivers/nouveau/Makefile.sources > @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ > nvc0/nvc0_query.h \ > nvc0/nvc0_query_hw.c \ > nvc0/nvc0_query_hw.h \ > + nvc0/nvc0_query_hw_metric.c \ > + nvc0/nvc0_query_hw_metric.h \ > nvc0/nvc0_query_hw_sm.c \ > nvc0/nvc0_query_hw_sm.h \ > nvc0/nvc0_query_sw.c \ > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c > index 91254be..90ee82f 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c > @@ -25,6 +25,7 @@ > > #include "nvc0/nvc0_context.h" > #include "nvc0/nvc0_query_hw.h" > +#include "nvc0/nvc0_query_hw_metric.h" > #include "nvc0/nvc0_query_hw_sm.h" > > #define NVC0_HW_QUERY_STATE_READY 0 > @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, unsigned > type, unsigned index) > return (struct nvc0_query *)hq; > } > > + hq = nvc0_hw_metric_create_query(nvc0, type); > + if (hq) { > + hq->base.funcs = &hw_query_funcs; > + return (struct nvc0_query *)hq; > + } > + > hq = CALLOC_STRUCT(nvc0_hw_query); > if (!hq) > return NULL; > @@ -435,14 +442,20 @@ int > nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, > struct pipe_driver_query_info *info) > { > - int num_hw_sm_queries = 0; > + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; > > num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, NULL); > + num_hw_metric_queries = > + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); > > if (!info) > - return num_hw_sm_queries; > + return num_hw_sm_queries + num_hw_metric_queries; > + > + if (id < num_hw_sm_queries) > + return nvc0_hw_sm_get_driver_query_info(screen, id, info); > > - return nvc0_hw_sm_get_driver_query_info(screen, id, info); > + return nvc0_hw_metric_get_driver_query_info(screen, > + id - num_hw_sm_queries, info); > } > > void > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c > new file mode 100644 > index 0000000..dbe350a > --- /dev/null > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c > @@ -0,0 +1,444 @@ > +/* > + * Copyright 2015 Samuel Pitoiset > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +#include "nvc0/nvc0_context.h" > +#include "nvc0/nvc0_query_hw_metric.h" > +#include "nvc0/nvc0_query_hw_sm.h" > + > +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ > +static const char *nvc0_hw_metric_names[] = > +{ > + "metric-achieved_occupancy", > + "metric-branch_efficiency", > + "metric-inst_issued", > + "metric-inst_per_wrap", > + "metric-inst_replay_overhead", > + "metric-issued_ipc", > + "metric-issue_slots", > + "metric-issue_slot_utilization", > + "metric-ipc", > +}; > + > +struct nvc0_hw_metric_query_cfg { > + uint32_t queries[8]; > + uint32_t num_queries; > +}; > + > +#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) > +#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c > + > +/* ==== Compute capability 2.0 (GF100/GF110) ==== */ > +static const struct nvc0_hw_metric_query_cfg > +sm20_achieved_occupancy = > +{ > + .queries[0] = _SM(ACTIVE_WARPS), > + .queries[1] = _SM(ACTIVE_CYCLES), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm20_branch_efficiency = > +{ > + .queries[0] = _SM(BRANCH), > + .queries[1] = _SM(DIVERGENT_BRANCH), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm20_inst_per_wrap = > +{ > + .queries[0] = _SM(INST_EXECUTED), > + .queries[1] = _SM(WARPS_LAUNCHED), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm20_inst_replay_overhead = > +{ > + .queries[0] = _SM(INST_ISSUED), > + .queries[1] = _SM(INST_EXECUTED), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm20_issued_ipc = > +{ > + .queries[0] = _SM(INST_ISSUED), > + .queries[1] = _SM(ACTIVE_CYCLES), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm20_ipc = > +{ > + .queries[0] = _SM(INST_EXECUTED), > + .queries[1] = _SM(ACTIVE_CYCLES), > + .num_queries = 2, > +}; > + > +static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] = > +{ > + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), > + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), > + _M(INST_ISSUED, NULL), > + _M(INST_PER_WRAP, &sm20_inst_per_wrap), > + _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead), > + _M(ISSUED_IPC, &sm20_issued_ipc), > + _M(ISSUE_SLOTS, NULL), > + _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc), > + _M(IPC, &sm20_ipc), > +}; > + > +/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */ > +static const struct nvc0_hw_metric_query_cfg > +sm21_inst_issued = > +{ > + .queries[0] = _SM(INST_ISSUED1_0), > + .queries[1] = _SM(INST_ISSUED1_1), > + .queries[2] = _SM(INST_ISSUED2_0), > + .queries[3] = _SM(INST_ISSUED2_1), > + .num_queries = 4, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm21_inst_replay_overhead = > +{ > + .queries[0] = _SM(INST_ISSUED1_0), > + .queries[1] = _SM(INST_ISSUED1_1), > + .queries[2] = _SM(INST_ISSUED2_0), > + .queries[3] = _SM(INST_ISSUED2_1), > + .queries[4] = _SM(INST_EXECUTED), > + .num_queries = 5, > +}; > + > +static const struct nvc0_hw_metric_query_cfg > +sm21_issued_ipc = > +{ > + .queries[0] = _SM(INST_ISSUED1_0), > + .queries[1] = _SM(INST_ISSUED1_1), > + .queries[2] = _SM(INST_ISSUED2_0), > + .queries[3] = _SM(INST_ISSUED2_1), > + .queries[4] = _SM(ACTIVE_CYCLES), > + .num_queries = 5, > +}; > + > +static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = > +{ > + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), > + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), > + _M(INST_ISSUED, &sm21_inst_issued), > + _M(INST_PER_WRAP, &sm20_inst_per_wrap), > + _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead), > + _M(ISSUED_IPC, &sm21_issued_ipc), > + _M(ISSUE_SLOTS, &sm21_inst_issued), > + _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc), > + _M(IPC, &sm20_ipc), > +}; > + > +#undef _SM > +#undef _M > + > +static inline const struct nvc0_hw_metric_query_cfg ** > +nvc0_hw_metric_get_queries(struct nvc0_screen *screen) > +{ > + struct nouveau_device *dev = screen->base.device; > + > + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) > + return sm20_hw_metric_queries; > + return sm21_hw_metric_queries; > +} > + > +static const struct nvc0_hw_metric_query_cfg * > +nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, > + struct nvc0_hw_query *hq) > +{ > + const struct nvc0_hw_metric_query_cfg **queries; > + struct nvc0_screen *screen = nvc0->screen; > + struct nvc0_query *q = &hq->base; > + > + queries = nvc0_hw_metric_get_queries(screen); > + return queries[q->type - NVC0_HW_METRIC_QUERY(0)]; > +} > + > +static void > +nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0, > + struct nvc0_hw_query *hq) > +{ > + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); > + unsigned i; > + > + for (i = 0; i < hmq->num_queries; i++) > + hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); > + FREE(hmq); > +} > + > +static boolean > +nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query > *hq) > +{ > + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); > + boolean ret = false; > + unsigned i; > + > + for (i = 0; i < hmq->num_queries; i++) { > + ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]); > + if (!ret) > + return ret; > + } > + return ret; > +} > + > +static void > +nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) > +{ > + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); > + unsigned i; > + > + for (i = 0; i < hmq->num_queries; i++) > + hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]); > +} > + > +static uint64_t > +sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) > +{ > + uint64_t value = 0; > + > + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { > + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: > + /* (active_warps / active_cycles) / max. number of warps on a MP */ > + if (res64[1]) > + value = (res64[0] / (float)res64[1]) / 48;
Why isn't this all just "return ...." and then a return 0 at the end? i.e. why do you have the value variable? Also I don't know how big these values get, but you might want to use doubles instead of floats. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev