git: 18054d0220cf - main - libpmc: Another update of x86 event definitions.

Alexander Motin Thu, 26 May 2022 19:09:07 -0700

The branch main has been updated by mav:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=18054d0220cfc8df9c9568c437bd6fbb59d53c3c


commit 18054d0220cfc8df9c9568c437bd6fbb59d53c3c
Author:     Alexander Motin <m...@freebsd.org>
AuthorDate: 2022-05-27 02:07:42 +0000
Commit:     Alexander Motin <m...@freebsd.org>
CommitDate: 2022-05-27 02:07:42 +0000

    libpmc: Another update of x86 event definitions.
    
    MFC after:      1 month
---
 .../pmu-events/arch/x86/alderlake/adl-metrics.json |  729 ++
 .../pmu-events/arch/x86/alderlake/cache.json       | 1140 +++
 .../arch/x86/alderlake/floating-point.json         |  158 +
 .../pmu-events/arch/x86/alderlake/frontend.json    |  491 ++
 .../pmu-events/arch/x86/alderlake/memory.json      |  318 +
 .../pmu-events/arch/x86/alderlake/other.json       |  146 +
 .../pmu-events/arch/x86/alderlake/pipeline.json    | 1721 +++++
 .../arch/x86/alderlake/uncore-memory.json          |  222 +
 .../arch/x86/alderlake/uncore-other.json           |   40 +
 .../arch/x86/alderlake/virtual-memory.json         |  258 +
 lib/libpmc/pmu-events/arch/x86/bonnell/cache.json  |  748 +-
 .../arch/x86/bonnell/floating-point.json           |  274 +-
 .../pmu-events/arch/x86/bonnell/frontend.json      |   96 +-
 lib/libpmc/pmu-events/arch/x86/bonnell/memory.json |  152 +-
 lib/libpmc/pmu-events/arch/x86/bonnell/other.json  |  452 +-
 .../pmu-events/arch/x86/bonnell/pipeline.json      |  402 +-
 .../arch/x86/bonnell/virtual-memory.json           |  126 +-
 .../pmu-events/arch/x86/broadwell/bdw-metrics.json |  353 +-
 .../pmu-events/arch/x86/broadwell/cache.json       | 4713 ++++++------
 .../arch/x86/broadwell/floating-point.json         |  235 +-
 .../pmu-events/arch/x86/broadwell/frontend.json    |  361 +-
 .../pmu-events/arch/x86/broadwell/memory.json      | 4312 +++++------
 .../pmu-events/arch/x86/broadwell/other.json       |   42 +-
 .../pmu-events/arch/x86/broadwell/pipeline.json    | 1903 +++--
 .../arch/x86/broadwell/virtual-memory.json         |  412 +-
 .../arch/x86/broadwellde/bdwde-metrics.json        |  407 +-
 .../pmu-events/arch/x86/broadwellde/cache.json     | 1122 +--
 .../arch/x86/broadwellde/floating-point.json       |  222 +-
 .../pmu-events/arch/x86/broadwellde/frontend.json  |  335 +-
 .../pmu-events/arch/x86/broadwellde/memory.json    |  608 +-
 .../pmu-events/arch/x86/broadwellde/other.json     |   28 +-
 .../pmu-events/arch/x86/broadwellde/pipeline.json  | 1892 +++--
 .../arch/x86/broadwellde/virtual-memory.json       |  394 +-
 .../arch/x86/broadwellx/bdx-metrics.json           |  351 +-
 .../pmu-events/arch/x86/broadwellx/cache.json      | 1300 ++--
 .../arch/x86/broadwellx/floating-point.json        |  224 +-
 .../pmu-events/arch/x86/broadwellx/frontend.json   |  335 +-
 .../pmu-events/arch/x86/broadwellx/memory.json     |  974 +--
 .../pmu-events/arch/x86/broadwellx/other.json      |   28 +-
 .../pmu-events/arch/x86/broadwellx/pipeline.json   | 1891 +++--
 .../arch/x86/broadwellx/virtual-memory.json        |  394 +-
 .../pmu-events/arch/x86/cascadelakex/cache.json    | 7817 +++++++++++++++++---
 .../arch/x86/cascadelakex/clx-metrics.json         |  469 +-
 .../arch/x86/cascadelakex/floating-point.json      |   50 +-
 .../pmu-events/arch/x86/cascadelakex/frontend.json |   18 +-
 .../pmu-events/arch/x86/cascadelakex/memory.json   | 1710 ++---
 .../pmu-events/arch/x86/cascadelakex/other.json    | 7618 ++-----------------
 .../pmu-events/arch/x86/cascadelakex/pipeline.json |   25 +-
 .../arch/x86/cascadelakex/uncore-memory.json       |   61 +
 .../arch/x86/cascadelakex/uncore-other.json        |  131 +-
 .../pmu-events/arch/x86/elkhartlake/other.json     |  254 +-
 .../pmu-events/arch/x86/elkhartlake/pipeline.json  |  254 +-
 lib/libpmc/pmu-events/arch/x86/goldmont/cache.json | 1466 ++--
 .../arch/x86/goldmont/floating-point.json          |   33 +
 .../pmu-events/arch/x86/goldmont/frontend.json     |   78 +-
 .../pmu-events/arch/x86/goldmont/memory.json       |   38 +-
 lib/libpmc/pmu-events/arch/x86/goldmont/other.json |   81 +-
 .../pmu-events/arch/x86/goldmont/pipeline.json     |  553 +-
 .../arch/x86/goldmont/virtual-memory.json          |   94 +-
 .../pmu-events/arch/x86/goldmontplus/cache.json    | 1730 +++--
 .../arch/x86/goldmontplus/floating-point.json      |   38 +
 .../pmu-events/arch/x86/goldmontplus/frontend.json |   88 +-
 .../pmu-events/arch/x86/goldmontplus/memory.json   |   44 +-
 .../pmu-events/arch/x86/goldmontplus/other.json    |   93 +-
 .../pmu-events/arch/x86/goldmontplus/pipeline.json |  631 +-
 .../arch/x86/goldmontplus/virtual-memory.json      |  214 +-
 lib/libpmc/pmu-events/arch/x86/haswell/cache.json  | 1446 ++--
 .../arch/x86/haswell/floating-point.json           |  129 +-
 .../pmu-events/arch/x86/haswell/frontend.json      |  362 +-
 .../pmu-events/arch/x86/haswell/hsw-metrics.json   |  265 +-
 lib/libpmc/pmu-events/arch/x86/haswell/memory.json | 1004 +--
 lib/libpmc/pmu-events/arch/x86/haswell/other.json  |   40 +-
 .../pmu-events/arch/x86/haswell/pipeline.json      | 1796 +++--
 .../pmu-events/arch/x86/haswell/uncore-cache.json  |  252 +
 .../pmu-events/arch/x86/haswell/uncore-other.json  |   69 +
 .../arch/x86/haswell/virtual-memory.json           |  552 +-
 lib/libpmc/pmu-events/arch/x86/haswellx/cache.json | 1434 ++--
 .../arch/x86/haswellx/floating-point.json          |  116 +-
 .../pmu-events/arch/x86/haswellx/frontend.json     |  336 +-
 .../pmu-events/arch/x86/haswellx/hsx-metrics.json  |  263 +-
 .../pmu-events/arch/x86/haswellx/memory.json       | 1070 +--
 lib/libpmc/pmu-events/arch/x86/haswellx/other.json |   28 +-
 .../pmu-events/arch/x86/haswellx/pipeline.json     | 1763 +++--
 .../arch/x86/haswellx/virtual-memory.json          |  512 +-
 lib/libpmc/pmu-events/arch/x86/icelake/cache.json  | 1284 +++-
 .../arch/x86/icelake/floating-point.json           |   69 +-
 .../pmu-events/arch/x86/icelake/frontend.json      |  449 +-
 .../pmu-events/arch/x86/icelake/icl-metrics.json   |  358 +-
 lib/libpmc/pmu-events/arch/x86/icelake/memory.json |  591 +-
 lib/libpmc/pmu-events/arch/x86/icelake/other.json  |  886 +--
 .../pmu-events/arch/x86/icelake/pipeline.json      | 1128 +--
 .../arch/x86/icelake/virtual-memory.json           |  178 +-
 lib/libpmc/pmu-events/arch/x86/icelakex/cache.json | 1110 ++-
 .../arch/x86/icelakex/floating-point.json          |   51 +-
 .../pmu-events/arch/x86/icelakex/frontend.json     |  501 +-
 .../pmu-events/arch/x86/icelakex/icx-metrics.json  |  280 +-
 .../pmu-events/arch/x86/icelakex/memory.json       |  596 +-
 lib/libpmc/pmu-events/arch/x86/icelakex/other.json |  527 +-
 .../pmu-events/arch/x86/icelakex/pipeline.json     | 1149 +--
 .../pmu-events/arch/x86/icelakex/uncore-other.json |   61 +-
 .../arch/x86/icelakex/virtual-memory.json          |  150 +-
 .../pmu-events/arch/x86/ivybridge/cache.json       | 1446 ++--
 .../arch/x86/ivybridge/floating-point.json         |  212 +-
 .../pmu-events/arch/x86/ivybridge/frontend.json    |  386 +-
 .../pmu-events/arch/x86/ivybridge/ivb-metrics.json |  287 +-
 .../pmu-events/arch/x86/ivybridge/memory.json      |  290 +-
 .../pmu-events/arch/x86/ivybridge/other.json       |   42 +-
 .../pmu-events/arch/x86/ivybridge/pipeline.json    | 1769 +++--
 .../arch/x86/ivybridge/uncore-cache.json           |  252 +
 .../arch/x86/ivybridge/uncore-other.json           |   91 +
 .../arch/x86/ivybridge/virtual-memory.json         |  208 +-
 lib/libpmc/pmu-events/arch/x86/ivytown/cache.json  | 1594 ++--
 .../arch/x86/ivytown/floating-point.json           |  212 +-
 .../pmu-events/arch/x86/ivytown/frontend.json      |  386 +-
 .../pmu-events/arch/x86/ivytown/ivt-metrics.json   |  277 +-
 lib/libpmc/pmu-events/arch/x86/ivytown/memory.json |  562 +-
 lib/libpmc/pmu-events/arch/x86/ivytown/other.json  |   42 +-
 .../pmu-events/arch/x86/ivytown/pipeline.json      | 1771 +++--
 .../pmu-events/arch/x86/ivytown/uncore-memory.json |    3 +-
 .../arch/x86/ivytown/virtual-memory.json           |  232 +-
 lib/libpmc/pmu-events/arch/x86/jaketown/cache.json | 1582 ++--
 .../arch/x86/jaketown/floating-point.json          |  160 +-
 .../pmu-events/arch/x86/jaketown/frontend.json     |  363 +-
 .../pmu-events/arch/x86/jaketown/jkt-metrics.json  |  140 +-
 .../pmu-events/arch/x86/jaketown/memory.json       |  478 +-
 lib/libpmc/pmu-events/arch/x86/jaketown/other.json |   58 +-
 .../pmu-events/arch/x86/jaketown/pipeline.json     | 1556 ++--
 .../arch/x86/jaketown/virtual-memory.json          |  178 +-
 .../pmu-events/arch/x86/knightslanding/cache.json  | 2602 ++++---
 .../arch/x86/knightslanding/floating-point.json    |   29 +
 .../arch/x86/knightslanding/frontend.json          |   48 +-
 .../pmu-events/arch/x86/knightslanding/memory.json | 1226 +--
 .../arch/x86/knightslanding/pipeline.json          |  465 +-
 .../arch/x86/knightslanding/virtual-memory.json    |   68 +-
 lib/libpmc/pmu-events/arch/x86/mapfile.csv         |    3 +
 .../pmu-events/arch/x86/nehalemep/cache.json       | 3062 ++++----
 .../arch/x86/nehalemep/floating-point.json         |  180 +-
 .../pmu-events/arch/x86/nehalemep/frontend.json    |   18 +-
 .../pmu-events/arch/x86/nehalemep/memory.json      |  670 +-
 .../pmu-events/arch/x86/nehalemep/other.json       |  170 +-
 .../pmu-events/arch/x86/nehalemep/pipeline.json    |  830 ++-
 .../arch/x86/nehalemep/virtual-memory.json         |   90 +-
 .../pmu-events/arch/x86/sandybridge/cache.json     | 2298 +++---
 .../arch/x86/sandybridge/floating-point.json       |  172 +-
 .../pmu-events/arch/x86/sandybridge/frontend.json  |  365 +-
 .../pmu-events/arch/x86/sandybridge/memory.json    |  520 +-
 .../pmu-events/arch/x86/sandybridge/other.json     |   66 +-
 .../pmu-events/arch/x86/sandybridge/pipeline.json  | 1634 ++--
 .../arch/x86/sandybridge/snb-metrics.json          |  150 +-
 .../arch/x86/sandybridge/uncore-cache.json         |  252 +
 .../arch/x86/sandybridge/uncore-other.json         |   91 +
 .../arch/x86/sandybridge/virtual-memory.json       |  160 +-
 .../pmu-events/arch/x86/sapphirerapids/cache.json  | 1083 +++
 .../arch/x86/sapphirerapids/floating-point.json    |  218 +
 .../arch/x86/sapphirerapids/frontend.json          |  471 ++
 .../pmu-events/arch/x86/sapphirerapids/memory.json |  415 ++
 .../pmu-events/arch/x86/sapphirerapids/other.json  |  362 +
 .../arch/x86/sapphirerapids/pipeline.json          | 1283 ++++
 .../arch/x86/sapphirerapids/uncore-memory.json     |  499 ++
 .../arch/x86/sapphirerapids/uncore-other.json      | 5150 +++++++++++++
 .../arch/x86/sapphirerapids/uncore-power.json      |   12 +
 .../arch/x86/sapphirerapids/virtual-memory.json    |  225 +
 .../pmu-events/arch/x86/silvermont/cache.json      |  940 ++-
 .../arch/x86/silvermont/floating-point.json        |   11 +
 .../pmu-events/arch/x86/silvermont/frontend.json   |   75 +-
 .../pmu-events/arch/x86/silvermont/memory.json     |    8 +-
 .../pmu-events/arch/x86/silvermont/other.json      |   20 +-
 .../pmu-events/arch/x86/silvermont/pipeline.json   |  422 +-
 .../arch/x86/silvermont/virtual-memory.json        |   76 +-
 lib/libpmc/pmu-events/arch/x86/skylake/cache.json  | 2473 +++----
 .../arch/x86/skylake/floating-point.json           |   48 +-
 .../pmu-events/arch/x86/skylake/frontend.json      |  578 +-
 lib/libpmc/pmu-events/arch/x86/skylake/memory.json | 1480 ++--
 lib/libpmc/pmu-events/arch/x86/skylake/other.json  |   36 -
 .../pmu-events/arch/x86/skylake/pipeline.json      | 1067 +--
 .../pmu-events/arch/x86/skylake/skl-metrics.json   |  497 +-
 .../arch/x86/skylake/virtual-memory.json           |  274 +-
 lib/libpmc/pmu-events/arch/x86/skylakex/cache.json |  221 +-
 .../arch/x86/skylakex/floating-point.json          |   24 +-
 .../pmu-events/arch/x86/skylakex/frontend.json     |   18 +-
 .../pmu-events/arch/x86/skylakex/memory.json       |  170 +-
 lib/libpmc/pmu-events/arch/x86/skylakex/other.json |   36 -
 .../pmu-events/arch/x86/skylakex/pipeline.json     |   25 +-
 .../pmu-events/arch/x86/skylakex/skx-metrics.json  |  461 +-
 .../arch/x86/skylakex/uncore-memory.json           |   20 +
 .../pmu-events/arch/x86/skylakex/uncore-other.json |  131 +-
 .../pmu-events/arch/x86/tigerlake/cache.json       |  130 +-
 .../arch/x86/tigerlake/floating-point.json         |   11 +-
 .../pmu-events/arch/x86/tigerlake/frontend.json    |   17 +-
 .../pmu-events/arch/x86/tigerlake/other.json       |  142 +-
 .../pmu-events/arch/x86/tigerlake/pipeline.json    |   93 +-
 lib/libpmc/pmu-events/arch/x86/tremontx/cache.json | 1111 ++-
 .../arch/x86/tremontx/floating-point.json          |   36 +
 .../pmu-events/arch/x86/tremontx/frontend.json     |   97 +-
 .../pmu-events/arch/x86/tremontx/memory.json       |  436 +-
 lib/libpmc/pmu-events/arch/x86/tremontx/other.json |  671 +-
 .../pmu-events/arch/x86/tremontx/pipeline.json     |  672 +-
 .../arch/x86/tremontx/uncore-memory.json           |  178 +-
 .../pmu-events/arch/x86/tremontx/uncore-other.json | 2246 +++++-
 .../arch/x86/tremontx/virtual-memory.json          |  331 +-
 .../pmu-events/arch/x86/westmereep-dp/cache.json   | 2734 +++----
 .../arch/x86/westmereep-dp/floating-point.json     |  180 +-
 .../arch/x86/westmereep-dp/frontend.json           |   18 +-
 .../pmu-events/arch/x86/westmereep-dp/memory.json  |  686 +-
 .../pmu-events/arch/x86/westmereep-dp/other.json   |  252 +-
 .../arch/x86/westmereep-dp/pipeline.json           |  846 ++-
 .../arch/x86/westmereep-dp/virtual-memory.json     |  138 +-
 .../pmu-events/arch/x86/westmereep-sp/cache.json   | 3148 ++++----
 .../arch/x86/westmereep-sp/floating-point.json     |  180 +-
 .../arch/x86/westmereep-sp/frontend.json           |   18 +-
 .../pmu-events/arch/x86/westmereep-sp/memory.json  |  672 +-
 .../pmu-events/arch/x86/westmereep-sp/other.json   |  252 +-
 .../arch/x86/westmereep-sp/pipeline.json           |  846 ++-
 .../arch/x86/westmereep-sp/virtual-memory.json     |  120 +-
 .../pmu-events/arch/x86/westmereex/cache.json      | 3148 ++++----
 .../arch/x86/westmereex/floating-point.json        |  180 +-
 .../pmu-events/arch/x86/westmereex/frontend.json   |   18 +-
 .../pmu-events/arch/x86/westmereex/memory.json     |  678 +-
 .../pmu-events/arch/x86/westmereex/other.json      |  252 +-
 .../pmu-events/arch/x86/westmereex/pipeline.json   |  850 ++-
 .../arch/x86/westmereex/virtual-memory.json        |  138 +-
 lib/libpmc/pmu-events/jevents.c                    |    5 +
 222 files changed, 83686 insertions(+), 61121 deletions(-)

diff --git a/lib/libpmc/pmu-events/arch/x86/alderlake/adl-metrics.json 
b/lib/libpmc/pmu-events/arch/x86/alderlake/adl-metrics.json
new file mode 100644
index 000000000000..6b24958737b5
--- /dev/null
+++ b/lib/libpmc/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -0,0 +1,729 @@
+[
+    {
+        "BriefDescription": "Total pipeline cost of branch related 
instructions (used for program control-flow including function calls)",
+        "MetricExpr": "100 * (( BR_INST_RETIRED.COND + 3 * 
BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - 
BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) ) / TOPDOWN.SLOTS)",
+        "MetricGroup": "Ret",
+        "MetricName": "Branching_Overhead",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Ret;Summary",
+        "MetricName": "IPC",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
+        "MetricGroup": "Pipeline;Mem",
+        "MetricName": "CPI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Per-Logical Processor actual clocks when the 
Logical Processor is active.",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "Pipeline",
+        "MetricName": "CLKS",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Total issue-pipeline slots (per-Physical Core 
till ICL; per-Logical Processor ICL onward)",
+        "MetricExpr": "TOPDOWN.SLOTS",
+        "MetricGroup": "TmaL1",
+        "MetricName": "SLOTS",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of Physical Core issue-slots utilized by 
this Logical Processor",
+        "MetricExpr": "TOPDOWN.SLOTS / ( TOPDOWN.SLOTS / 2 ) if #SMT_on else 
1",
+        "MetricGroup": "SMT",
+        "MetricName": "Slots_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "The ratio of Executed- by Issued-Uops",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+        "MetricGroup": "Cor;Pipeline",
+        "MetricName": "Execute_per_Issue",
+        "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 
suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of 
\"execute\" at rename stage.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle across hyper-threads (per 
physical core)",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "Ret;SMT;TmaL1",
+        "MetricName": "CoreIPC",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Floating Point Operations Per Cycle",
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + 
FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * 
FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "Ret;Flops",
+        "MetricName": "FLOPc",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Actual per-core usage of the Floating Point 
execution units (regardless of the vector width)",
+        "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + 
FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + 
(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) ) / ( 2 * 
CPU_CLK_UNHALTED.DISTRIBUTED )",
+        "MetricGroup": "Cor;Flops;HPC",
+        "MetricName": "FP_Arith_Utilization",
+        "PublicDescription": "Actual per-core usage of the Floating Point 
execution units (regardless of the vector width). Values > 1 are possible due 
to Fused-Multiply Add (FMA) counting.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instruction-Level-Parallelism (average number of 
uops executed when there is at least 1 uop executed)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (( 
UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else 
UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
+        "MetricName": "ILP",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch 
Misprediction (JEClear)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;BadSpec;BrMispredicts",
+        "MetricName": "IpMispredict",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Core actual clocks when any Logical Processor is 
active on the Physical Core",
+        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricGroup": "SMT",
+        "MetricName": "CORE_CLKS",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per Load (lower number means higher 
occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "MetricGroup": "InsType",
+        "MetricName": "IpLoad",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per Store (lower number means higher 
occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "MetricGroup": "InsType",
+        "MetricName": "IpStore",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per Branch (lower number means 
higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Branches;Fed;InsType",
+        "MetricName": "IpBranch",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per (near) call (lower number means 
higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "MetricGroup": "Branches;Fed;PGO",
+        "MetricName": "IpCall",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instruction per taken branch",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
+        "MetricName": "IpTB",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / 
BR_INST_RETIRED.NEAR_TAKEN",
+        "MetricGroup": "Branches;Fed;PGO",
+        "MetricName": "BpTkBranch",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per Floating Point (FP) Operation 
(lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( 
FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 
* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+        "MetricGroup": "Flops;InsType",
+        "MetricName": "IpFLOP",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic instruction (lower 
number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 
(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + 
(FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) )",
+        "MetricGroup": "Flops;InsType",
+        "MetricName": "IpArith",
+        "PublicDescription": "Instructions per FP Arithmetic instruction 
(lower number means higher occurrence rate). May undercount due to FMA double 
counting. Approximated prior to BDW.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic Scalar 
Single-Precision instruction (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+        "MetricGroup": "Flops;FpScalar;InsType",
+        "MetricName": "IpArith_Scalar_SP",
+        "PublicDescription": "Instructions per FP Arithmetic Scalar 
Single-Precision instruction (lower number means higher occurrence rate). May 
undercount due to FMA double counting.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic Scalar 
Double-Precision instruction (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "MetricGroup": "Flops;FpScalar;InsType",
+        "MetricName": "IpArith_Scalar_DP",
+        "PublicDescription": "Instructions per FP Arithmetic Scalar 
Double-Precision instruction (lower number means higher occurrence rate). May 
undercount due to FMA double counting.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit 
instruction (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 
FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "IpArith_AVX128",
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit 
instruction (lower number means higher occurrence rate). May undercount due to 
FMA double counting.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit 
instruction (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / ( 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "IpArith_AVX256",
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit 
instruction (lower number means higher occurrence rate). May undercount due to 
FMA double counting.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Total number of retired Instructions, Sample 
with: INST_RETIRED.PREC_DIST",
+        "MetricExpr": "INST_RETIRED.ANY",
+        "MetricGroup": "Summary;TmaL1",
+        "MetricName": "Instructions",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average number of Uops issued by front-end when 
it issued something",
+        "MetricExpr": "UOPS_ISSUED.ANY / 
cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@",
+        "MetricGroup": "Fed;FetchBW",
+        "MetricName": "Fetch_UpC",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the LSD (Loop 
Stream Detector; aka Loop Cache)",
+        "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + 
IDQ.MS_UOPS)",
+        "MetricGroup": "Fed;LSD",
+        "MetricName": "LSD_Coverage",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka 
Decoded ICache; or Uop Cache)",
+        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS 
+ IDQ.MS_UOPS)",
+        "MetricGroup": "DSB;Fed;FetchBW",
+        "MetricName": "DSB_Coverage",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative DSB 
miss",
+        "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+        "MetricGroup": "DSBmiss;Fed",
+        "MetricName": "IpDSB_Miss_Ret",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of branches that are non-taken 
conditionals",
+        "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / 
BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;Branches;CodeGen;PGO",
+        "MetricName": "Cond_NT",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of branches that are taken conditionals",
+        "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / 
BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;Branches;CodeGen;PGO",
+        "MetricName": "Cond_TK",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of branches that are CALL or RET",
+        "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + 
BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;Branches",
+        "MetricName": "CallRet",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of branches that are unconditional 
(direct or indirect) jumps",
+        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - 
BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL) / 
BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricGroup": "Bad;Branches",
+        "MetricName": "Jump",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of branches of other types (not 
individually covered by other metrics in Info.Branches group)",
+        "MetricExpr": "1 - ( (BR_INST_RETIRED.COND_NTAKEN / 
BR_INST_RETIRED.ALL_BRANCHES) + (BR_INST_RETIRED.COND_TAKEN / 
BR_INST_RETIRED.ALL_BRANCHES) + (( BR_INST_RETIRED.NEAR_CALL + 
BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES) + 
((BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * 
BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES) )",
+        "MetricGroup": "Bad;Branches",
+        "MetricName": "Other_Branches",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss 
demand load instructions (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + 
MEM_LOAD_RETIRED.FB_HIT )",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "Load_Miss_Real_Latency",
+        "PublicDescription": "Actual Average Latency for L1 data-cache miss 
demand load instructions (in core cycles). Latency may be overestimated for 
multi-load instructions - e.g. repeat strings.",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 
miss demand load when there is at least one such miss. Per-Logical Processor)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBound;MemoryBW",
+        "MetricName": "MLP",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache 
[GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L1D_Cache_Fill_BW",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / 
sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L2_Cache_Fill_BW",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 
cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / 
duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "L3_Cache_Fill_BW",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 
cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / 
duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "L3_Cache_Access_BW",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L1 cache true misses per kilo instruction for 
retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L1MPKI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L1 cache true misses per kilo instruction for all 
demand loads (including speculative)",
+        "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L1MPKI_Load",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L2 cache true misses per kilo instruction for 
retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;Backend;CacheMisses",
+        "MetricName": "L2MPKI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L2 cache misses per kilo instruction for all 
request types (including speculative)",
+        "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses;Offcore",
+        "MetricName": "L2MPKI_All",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L2 cache misses per kilo instruction for all 
demand loads  (including speculative)",
+        "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L2MPKI_Load",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L2 cache hits per kilo instruction for all 
request types (including speculative)",
+        "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / 
INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L2HPKI_All",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L2 cache hits per kilo instruction for all demand 
loads  (including speculative)",
+        "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L2HPKI_Load",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "L3 cache true misses per kilo instruction for 
retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "L3MPKI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions 
for retired demand loads",
+        "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;CacheMisses",
+        "MetricName": "FB_HPKI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving 
STLB misses triggered by instruction/Load/Store accesses",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + 
DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING ) / ( 4 * 
CPU_CLK_UNHALTED.DISTRIBUTED )",
+        "MetricGroup": "Mem;MemoryTLB",
+        "MetricName": "Page_Walks_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricGroup": "HPC;Summary",
+        "MetricName": "CPU_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Measured Average Frequency for unhalted 
processors [GHz]",
+        "MetricExpr": "(CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * 
msr@tsc@ / 1000000000 / duration_time",
+        "MetricGroup": "Summary;Power",
+        "MetricName": "Average_Frequency",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Giga Floating Point Operations Per Second",
+        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + 
FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * 
FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( 
FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + 
FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * 
FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
+        "MetricGroup": "Cor;Flops;HPC",
+        "MetricName": "GFLOPs",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal 
frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricGroup": "Power",
+        "MetricName": "Turbo_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of cycles where both hardware Logical 
Processors were active",
+        "MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / 
CPU_CLK_UNHALTED.REF_DISTRIBUTED if #SMT_on else 0",
+        "MetricGroup": "SMT",
+        "MetricName": "SMT_2T_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in the Operating System 
(OS) Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / CPU_CLK_UNHALTED.THREAD",
+        "MetricGroup": "OS",
+        "MetricName": "Kernel_Utilization",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction for the Operating System 
(OS) Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+        "MetricGroup": "OS",
+        "MetricName": "Kernel_CPI",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average external Memory Bandwidth Use for reads 
and writes [GB / sec]",
+        "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + 
arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+        "MetricName": "DRAM_BW_Use",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Average number of parallel requests to external 
memory. Accounts for all requests",
+        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / 
arb@event\\=0x81\\,umask\\=0x1@",
+        "MetricGroup": "Mem;SoC",
+        "MetricName": "MEM_Parallel_Requests",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Instructions per Far Branch ( Far Branches apply 
upon transition from application to operating system, handling interrupts, 
exceptions) [lower number means higher occurrence rate]",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
+        "MetricGroup": "Branches;OS",
+        "MetricName": "IpFarBranch",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not 
consumed by the backend due to frontend stalls.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.ALL / (5 * CPU_CLK_UNHALTED.CORE)",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Frontend_Bound",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were 
not consumed by the backend because allocation is stalled due to a mispredicted 
jump or a machine clear",
+        "MetricExpr": "TOPDOWN_BAD_SPECULATION.ALL / (5 * 
CPU_CLK_UNHALTED.CORE)",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Bad_Speculation",
+        "PublicDescription": "Counts the total number of issue slots that were 
not consumed by the backend because allocation is stalled due to a mispredicted 
jump or a machine clear. Only issue slots wasted due to fast nukes such as 
memory ordering nukes are counted. Other nukes are not accounted for. Counts 
all issue slots blocked during this recovery window including relevant 
microcode flows and while uops are not yet available in the instruction queue 
(IQ). Also includes the issue slots that were consumed by the backend but were 
thrown away because they were younger than the mispredict or machine clear.",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots  that were 
not consumed by the backend due to backend stalls",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricExpr": "TOPDOWN_BE_BOUND.ALL / (5 * CPU_CLK_UNHALTED.CORE)",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound",
+        "PublicDescription": "Counts the total number of issue slots  that 
were not consumed by the backend due to backend stalls.  Note that uops must be 
available for consumption in order for this event to count.  If a uop is not 
available (IQ is empty), this event will not count.   The rest of these 
subevents count backend stalls, in cycles, due to an outstanding request which 
is memory bound vs core bound.   The subevents are not slot based events and 
therefore can not be precisely added or subtracted from the Backend_Bound_Aux 
subevents which are slot based.",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots  that were 
not consumed by the backend due to backend stalls",
+        "MetricExpr": "TOPDOWN_BE_BOUND.ALL / (5 * CPU_CLK_UNHALTED.CORE)",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Backend_Bound_Aux",
+        "PublicDescription": "Counts the total number of issue slots  that 
were not consumed by the backend due to backend stalls.  Note that UOPS must be 
available for consumption in order for this event to count.  If a uop is not 
available (IQ is empty), this event will not count.  All of these subevents 
count backend stalls, in slots, due to a resource limitation.   These are not 
cycle based events and therefore can not be precisely added or subtracted from 
the Backend_Bound subevents which are cycle based.  These subevents are 
supplementary to Backend_Bound and can be used to analyze results from a 
resource perspective at allocation.  ",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the numer of issue slots  that result in 
retirement slots. ",
+        "MetricExpr": "TOPDOWN_RETIRING.ALL / (5 * CPU_CLK_UNHALTED.CORE)",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "Retiring",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE",
+        "MetricName": "CLKS",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
+        "MetricName": "CLKS_P",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "5 * CPU_CLK_UNHALTED.CORE",
+        "MetricName": "SLOTS",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle",
+        "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.CORE",
+        "MetricName": "IPC",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE / INST_RETIRED.ANY",
+        "MetricName": "CPI",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+        "MetricName": "UPI",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads with a 
store forward or unknown store address block",
+        "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / 
MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Store_Fwd_Blocks",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads with a 
address aliasing block",
+        "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Address_Alias_Blocks",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads that 
are splits",
+        "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / 
MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Load_Splits",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instructions per Branch (lower number means 
higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricName": "IpBranch",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instruction per (near) call (lower number means 
higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+        "MetricName": "IpCall",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instructions per Load",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "IpLoad",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instructions per Store",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "MetricName": "IpStore",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch 
Misprediction",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricName": "IpMispredict",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Instructions per Far Branch",
+        "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
+        "MetricName": "IpFarBranch",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Ratio of all branches which mispredict",
+        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / 
BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricName": "Branch_Mispredict_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Ratio between Mispredicted branches and unknown 
branches",
+        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+        "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are ucode ops",
+        "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
+        "MetricName": "Microcode_Uop_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are FPDiv uops",
+        "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
+        "MetricName": "FPDiv_Uop_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are IDiv uops",
+        "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
+        "MetricName": "IDiv_Uop_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are x87 uops",
+        "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
+        "MetricName": "X87_Uop_Ratio",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal 
frequency",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricName": "Turbo_Utilization",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE:k / CPU_CLK_UNHALTED.CORE",
+        "MetricName": "Kernel_Utilization",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricName": "CPU_Utilization",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Estimated Pause cost. In percent",
+        "MetricExpr": "100 * SERIALIZATION.NON_C01_MS_SCB / ( 5 * 
CPU_CLK_UNHALTED.CORE )",
+        "MetricName": "Estimated_Pause_Cost",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Cycle cost per L2 hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / 
MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "MetricName": "Cycles_per_Demand_Load_L2_Hit",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Cycle cost per LLC hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / 
MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "MetricName": "Cycles_per_Demand_Load_L3_Hit",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Cycle cost per DRAM hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / 
MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "MetricName": "Cycles_per_Demand_Load_DRAM_Hit",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in the 
L2",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / ( 
MEM_BOUND_STALLS.IFETCH )",
+        "MetricName": "Inst_Miss_Cost_L2Hit_Percent",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in the 
L3",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / ( 
MEM_BOUND_STALLS.IFETCH )",
+        "MetricName": "Inst_Miss_Cost_L3Hit_Percent",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in 
DRAM",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / ( 
MEM_BOUND_STALLS.IFETCH )",
+        "MetricName": "Inst_Miss_Cost_DRAMHit_Percent",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "load ops retired per 1000 instruction",
+        "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+        "MetricName": "MemLoadPKI",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "C1 residency percent per core",
+        "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C1_Core_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C8 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c8\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C8_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C9 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c9\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C9_Pkg_Residency"
+    },
+    {
+        "BriefDescription": "C10 residency percent per package",
+        "MetricExpr": "(cstate_pkg@c10\\-residency@ / msr@tsc@) * 100",
+        "MetricGroup": "Power",
+        "MetricName": "C10_Pkg_Residency"
+    }
+]
diff --git a/lib/libpmc/pmu-events/arch/x86/alderlake/cache.json 
b/lib/libpmc/pmu-events/arch/x86/alderlake/cache.json
new file mode 100644
index 000000000000..b83ed129c454
--- /dev/null
+++ b/lib/libpmc/pmu-events/arch/x86/alderlake/cache.json
@@ -0,0 +1,1140 @@
+[
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled 
due to an instruction cache or tlb miss which hit in the L2, LLC, DRAM or MMIO 
(Non-DRAM).",
+        "CollectPEBSRecord": "2",
+        "Counter": "0,1,2,3,4,5",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH",
+        "PEBScounters": "0,1,2,3,4,5",
*** 198608 LINES SKIPPED ***

git: 18054d0220cf - main - libpmc: Another update of x86 event definitions.

Reply via email to