On Fri, Oct 16, 2015 at 5:35 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 10/16/2015 11:22 PM, Ilia Mirkin wrote: >> >> On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset >> <samuel.pitoi...@gmail.com> wrote: >>> >>> As explained in the CUDA toolkit documentation, "a metric is a >>> characteristic of an application that is calculated from one or more >>> event values." >>> >>> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> >>> --- >>> src/gallium/drivers/nouveau/Makefile.sources | 2 + >>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +- >>> .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 444 >>> +++++++++++++++++++++ >>> .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h | 42 ++ >>> 4 files changed, 504 insertions(+), 3 deletions(-) >>> create mode 100644 >>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c >>> create mode 100644 >>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h >>> >>> diff --git a/src/gallium/drivers/nouveau/Makefile.sources >>> b/src/gallium/drivers/nouveau/Makefile.sources >>> index edc6cf4..c18e9f5 100644 >>> --- a/src/gallium/drivers/nouveau/Makefile.sources >>> +++ b/src/gallium/drivers/nouveau/Makefile.sources >>> @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \ >>> nvc0/nvc0_query.h \ >>> nvc0/nvc0_query_hw.c \ >>> nvc0/nvc0_query_hw.h \ >>> + nvc0/nvc0_query_hw_metric.c \ >>> + nvc0/nvc0_query_hw_metric.h \ >>> nvc0/nvc0_query_hw_sm.c \ >>> nvc0/nvc0_query_hw_sm.h \ >>> nvc0/nvc0_query_sw.c \ >>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c >>> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c >>> index 91254be..90ee82f 100644 >>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c >>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c >>> @@ -25,6 +25,7 @@ >>> >>> #include "nvc0/nvc0_context.h" >>> #include "nvc0/nvc0_query_hw.h" >>> +#include "nvc0/nvc0_query_hw_metric.h" >>> #include "nvc0/nvc0_query_hw_sm.h" >>> >>> #define NVC0_HW_QUERY_STATE_READY 0 >>> @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0, >>> unsigned type, unsigned index) >>> return (struct nvc0_query *)hq; >>> } >>> >>> + hq = nvc0_hw_metric_create_query(nvc0, type); >>> + if (hq) { >>> + hq->base.funcs = &hw_query_funcs; >>> + return (struct nvc0_query *)hq; >>> + } >>> + >>> hq = CALLOC_STRUCT(nvc0_hw_query); >>> if (!hq) >>> return NULL; >>> @@ -435,14 +442,20 @@ int >>> nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id, >>> struct pipe_driver_query_info *info) >>> { >>> - int num_hw_sm_queries = 0; >>> + int num_hw_sm_queries = 0, num_hw_metric_queries = 0; >>> >>> num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0, >>> NULL); >>> + num_hw_metric_queries = >>> + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL); >>> >>> if (!info) >>> - return num_hw_sm_queries; >>> + return num_hw_sm_queries + num_hw_metric_queries; >>> + >>> + if (id < num_hw_sm_queries) >>> + return nvc0_hw_sm_get_driver_query_info(screen, id, info); >>> >>> - return nvc0_hw_sm_get_driver_query_info(screen, id, info); >>> + return nvc0_hw_metric_get_driver_query_info(screen, >>> + id - num_hw_sm_queries, >>> info); >>> } >>> >>> void >>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c >>> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c >>> new file mode 100644 >>> index 0000000..dbe350a >>> --- /dev/null >>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c >>> @@ -0,0 +1,444 @@ >>> +/* >>> + * Copyright 2015 Samuel Pitoiset >>> + * >>> + * Permission is hereby granted, free of charge, to any person obtaining >>> a >>> + * copy of this software and associated documentation files (the >>> "Software"), >>> + * to deal in the Software without restriction, including without >>> limitation >>> + * the rights to use, copy, modify, merge, publish, distribute, >>> sublicense, >>> + * and/or sell copies of the Software, and to permit persons to whom the >>> + * Software is furnished to do so, subject to the following conditions: >>> + * >>> + * The above copyright notice and this permission notice shall be >>> included in >>> + * all copies or substantial portions of the Software. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>> EXPRESS OR >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>> MERCHANTABILITY, >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >>> SHALL >>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>> + * OTHER DEALINGS IN THE SOFTWARE. >>> + */ >>> + >>> +#include "nvc0/nvc0_context.h" >>> +#include "nvc0/nvc0_query_hw_metric.h" >>> +#include "nvc0/nvc0_query_hw_sm.h" >>> + >>> +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */ >>> +static const char *nvc0_hw_metric_names[] = >>> +{ >>> + "metric-achieved_occupancy", >>> + "metric-branch_efficiency", >>> + "metric-inst_issued", >>> + "metric-inst_per_wrap", >>> + "metric-inst_replay_overhead", >>> + "metric-issued_ipc", >>> + "metric-issue_slots", >>> + "metric-issue_slot_utilization", >>> + "metric-ipc", >>> +}; >>> + >>> +struct nvc0_hw_metric_query_cfg { >>> + uint32_t queries[8]; >>> + uint32_t num_queries; >>> +}; >>> + >>> +#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n) >>> +#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c >>> + >>> +/* ==== Compute capability 2.0 (GF100/GF110) ==== */ >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_achieved_occupancy = >>> +{ >>> + .queries[0] = _SM(ACTIVE_WARPS), >>> + .queries[1] = _SM(ACTIVE_CYCLES), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_branch_efficiency = >>> +{ >>> + .queries[0] = _SM(BRANCH), >>> + .queries[1] = _SM(DIVERGENT_BRANCH), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_inst_per_wrap = >>> +{ >>> + .queries[0] = _SM(INST_EXECUTED), >>> + .queries[1] = _SM(WARPS_LAUNCHED), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_inst_replay_overhead = >>> +{ >>> + .queries[0] = _SM(INST_ISSUED), >>> + .queries[1] = _SM(INST_EXECUTED), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_issued_ipc = >>> +{ >>> + .queries[0] = _SM(INST_ISSUED), >>> + .queries[1] = _SM(ACTIVE_CYCLES), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm20_ipc = >>> +{ >>> + .queries[0] = _SM(INST_EXECUTED), >>> + .queries[1] = _SM(ACTIVE_CYCLES), >>> + .num_queries = 2, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] = >>> +{ >>> + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), >>> + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), >>> + _M(INST_ISSUED, NULL), >>> + _M(INST_PER_WRAP, &sm20_inst_per_wrap), >>> + _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead), >>> + _M(ISSUED_IPC, &sm20_issued_ipc), >>> + _M(ISSUE_SLOTS, NULL), >>> + _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc), >>> + _M(IPC, &sm20_ipc), >>> +}; >>> + >>> +/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */ >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm21_inst_issued = >>> +{ >>> + .queries[0] = _SM(INST_ISSUED1_0), >>> + .queries[1] = _SM(INST_ISSUED1_1), >>> + .queries[2] = _SM(INST_ISSUED2_0), >>> + .queries[3] = _SM(INST_ISSUED2_1), >>> + .num_queries = 4, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm21_inst_replay_overhead = >>> +{ >>> + .queries[0] = _SM(INST_ISSUED1_0), >>> + .queries[1] = _SM(INST_ISSUED1_1), >>> + .queries[2] = _SM(INST_ISSUED2_0), >>> + .queries[3] = _SM(INST_ISSUED2_1), >>> + .queries[4] = _SM(INST_EXECUTED), >>> + .num_queries = 5, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg >>> +sm21_issued_ipc = >>> +{ >>> + .queries[0] = _SM(INST_ISSUED1_0), >>> + .queries[1] = _SM(INST_ISSUED1_1), >>> + .queries[2] = _SM(INST_ISSUED2_0), >>> + .queries[3] = _SM(INST_ISSUED2_1), >>> + .queries[4] = _SM(ACTIVE_CYCLES), >>> + .num_queries = 5, >>> +}; >>> + >>> +static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] = >>> +{ >>> + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy), >>> + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency), >>> + _M(INST_ISSUED, &sm21_inst_issued), >>> + _M(INST_PER_WRAP, &sm20_inst_per_wrap), >>> + _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead), >>> + _M(ISSUED_IPC, &sm21_issued_ipc), >>> + _M(ISSUE_SLOTS, &sm21_inst_issued), >>> + _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc), >>> + _M(IPC, &sm20_ipc), >>> +}; >>> + >>> +#undef _SM >>> +#undef _M >>> + >>> +static inline const struct nvc0_hw_metric_query_cfg ** >>> +nvc0_hw_metric_get_queries(struct nvc0_screen *screen) >>> +{ >>> + struct nouveau_device *dev = screen->base.device; >>> + >>> + if (dev->chipset == 0xc0 || dev->chipset == 0xc8) >>> + return sm20_hw_metric_queries; >>> + return sm21_hw_metric_queries; >>> +} >>> + >>> +static const struct nvc0_hw_metric_query_cfg * >>> +nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, >>> + struct nvc0_hw_query *hq) >>> +{ >>> + const struct nvc0_hw_metric_query_cfg **queries; >>> + struct nvc0_screen *screen = nvc0->screen; >>> + struct nvc0_query *q = &hq->base; >>> + >>> + queries = nvc0_hw_metric_get_queries(screen); >>> + return queries[q->type - NVC0_HW_METRIC_QUERY(0)]; >>> +} >>> + >>> +static void >>> +nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0, >>> + struct nvc0_hw_query *hq) >>> +{ >>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); >>> + unsigned i; >>> + >>> + for (i = 0; i < hmq->num_queries; i++) >>> + hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]); >>> + FREE(hmq); >>> +} >>> + >>> +static boolean >>> +nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct >>> nvc0_hw_query *hq) >>> +{ >>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); >>> + boolean ret = false; >>> + unsigned i; >>> + >>> + for (i = 0; i < hmq->num_queries; i++) { >>> + ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]); >>> + if (!ret) >>> + return ret; >>> + } >>> + return ret; >>> +} >>> + >>> +static void >>> +nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query >>> *hq) >>> +{ >>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq); >>> + unsigned i; >>> + >>> + for (i = 0; i < hmq->num_queries; i++) >>> + hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]); >>> +} >>> + >>> +static uint64_t >>> +sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8]) >>> +{ >>> + uint64_t value = 0; >>> + >>> + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) { >>> + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY: >>> + /* (active_warps / active_cycles) / max. number of warps on a MP >>> */ >>> + if (res64[1]) >>> + value = (res64[0] / (float)res64[1]) / 48; >> >> Why isn't this all just "return ...." and then a return 0 at the end? >> i.e. why do you have the value variable? > > > Oh yes! it's better indeed. > >> >> Also I don't know how big these values get, but you might want to use >> doubles instead of floats. > > > The HUD currently only supports 64-bits integer, and float will be enough in > my opinion. >
OK. One issue is that float only accurately represents integers up to 1<<23, so if you do (float)res64[0] / (float)res64[1], and either is outside that range, you'll lose accuracy on the division. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev