break;
}
@@ -300,6 +340,10 @@ nv50_query_result(struct pipe_context *pipe,
struct pipe_query *pq,
if (q->state != NV50_QUERY_STATE_READY)
nv50_query_update(q);
+ if ((q->type >= NV50_HW_PM_QUERY(0) && q->type <=
NV50_HW_PM_QUERY_LAST)) {
+ return nv50_hw_pm_query_result(nv50, q, wait, result);
+ }
+
if (q->state != NV50_QUERY_STATE_READY) {
if (!wait) {
/* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
@@ -476,6 +520,1018 @@ nva0_so_target_save_offset(struct pipe_context
*pipe,
nv50_query_end(pipe, targ->pq);
}
+/* === HARDWARE GLOBAL PERFORMANCE COUNTERS for NV50 === */
+
+struct nv50_hw_pm_source_cfg
+{
+ const char *name;
+ uint64_t value;
+};
+
+struct nv50_hw_pm_signal_cfg
+{
+ const char *name;
+ const struct nv50_hw_pm_source_cfg src[8];
+};
+
+struct nv50_hw_pm_counter_cfg
+{
+ uint16_t logic_op;
+ const struct nv50_hw_pm_signal_cfg sig[4];
+};
+
+enum nv50_hw_pm_query_display
+{
+ NV50_HW_PM_EVENT_DISPLAY_RAW,
+ NV50_HW_PM_EVENT_DISPLAY_RATIO,
+};
+
+enum nv50_hw_pm_query_count
+{
+ NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ NV50_HW_PM_EVENT_COUNT_B4,
+ NV50_HW_PM_EVENT_COUNT_B6,
+};
+
+struct nv50_hw_pm_event_cfg
+{
+ const char *name;
+ const char *desc;
+ enum nv50_hw_pm_query_display display;
+ enum nv50_hw_pm_query_count count;
+ uint8_t domain;
+};
+
+struct nv50_hw_pm_query_cfg
+{
+ const struct nv50_hw_pm_event_cfg *event;
+ const struct nv50_hw_pm_counter_cfg ctr[4];
+};
+
+#define SRC(name, val) { name, val }
+#define SIG(name, ...) { name, { __VA_ARGS__ } }
+#define CTR(func, ...) { func, { __VA_ARGS__ } }
+
+/*
+ * GPU
+ */
+/* gpu_idle */
+static const struct nv50_hw_pm_event_cfg
+nv50_gpu_idle_event =
+{
+ .name = "gpu_idle",
+ .desc = "The percentage of time the GPU is idle/busy since the
last "
+ "call. Having the GPU idle at all is a waste of
valuable "
+ "resources. You want to balance the GPU and CPU
workloads so "
+ "that no one processor is starved for work. Time
management or "
+ "using multithreading in your application can help
balance CPU "
+ "based tasks (world management, etc.) with the
rendering "
+ "pipeline.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_gpu_idle_query =
+{
+ .event = &nv50_gpu_idle_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_gr_idle")),
+};
+
+/*
+ * INPUT ASSEMBLER
+ */
+/* input_assembler_busy */
+static const struct nv50_hw_pm_event_cfg
+nv50_ia_busy_event =
+{
+ .name = "input_assembler_busy",
+ .desc = "The percentage of time the input assembler unit is
busy. This "
+ "is mainly impacted by both the number of vertices
processed as "
+ "well as the size of the attributes on those vertices.
You can "
+ "optimize this by reducing vertex size as much as
possible and "
+ "using indexed primitives to take advantage of the
vertex cache.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_ia_busy_query =
+{
+ .event = &nv50_ia_busy_event,
+ .ctr[0] = CTR(0xf888, SIG("pc01_vfetch_18",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1)),
+ SIG("pc01_vfetch_17"),
+ SIG("pc01_vfetch_03"),
+ SIG("pc01_vfetch_02")),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nva0_ia_busy_query =
+{
+ .event = &nv50_ia_busy_event,
+ .ctr[0] = CTR(0xf888, SIG("pc01_vfetch_15",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1)),
+ SIG("pc01_vfetch_14"),
+ SIG("pc01_vfetch_03"),
+ SIG("pc01_vfetch_02")),
+};
+
+/* input_assembler_waits_for_fb */
+static const struct nv50_hw_pm_event_cfg
+nv50_ia_waits_for_fb_event = {
+ .name = "input_assembler_waits_for_fb",
+ .desc = "This is the amount of time the input assembler unit
was "
+ "waiting for data from the frame buffer unit.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_ia_waits_for_fb_query =
+{
+ .event = &nv50_ia_waits_for_fb_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_vfetch_0e",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1))),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nva0_ia_waits_for_fb_query =
+{
+ .event = &nv50_ia_waits_for_fb_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_vfetch_0b",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1))),
+};
+
+/* vertex_attribute_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_vertex_attr_count_event =
+{
+ .name = "vertex_attribute_count",
+ .desc = "The number of vertex attributes that are fetched and
passed to "
+ "the geometry unit is returned in this counter. A
large number "
+ "of attributes (or unaligned vertices) can hurt vertex
cache "
+ "performance and reduce the overall vertex processing "
+ "capabilities of the pipeline.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_vertex_attr_count_query =
+{
+ .event = &nv50_vertex_attr_count_event,
+ .ctr[0] = CTR(0xf888, SIG("pc01_vfetch_18",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1)),
+ SIG("pc01_vfetch_17"),
+ SIG("pc01_vfetch_03"),
+ SIG("pc01_vfetch_02")),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nva0_vertex_attr_count_query =
+{
+ .event = &nv50_vertex_attr_count_event,
+ .ctr[0] = CTR(0xf888, SIG("pc01_vfetch_15",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x1)),
+ SIG("pc01_vfetch_14"),
+ SIG("pc01_vfetch_03"),
+ SIG("pc01_vfetch_02")),
+};
+
+/*
+ * GEOM
+ */
+/* geom_vertex_in_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_geom_vertex_in_count_event =
+{
+ .name = "geom_vertex_in_count",
+ .desc = "The number of vertices input to the geom unit.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_B4,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_geom_vertex_in_count_query =
+{
+ .event = &nv50_geom_vertex_in_count_event,
+ .ctr[1] = CTR(0xffff, SIG("pc01_vfetch_0e",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x0)),
+ SIG("pc01_vfetch_0f"),
+ SIG("pc01_vfetch_10"),
+ SIG("pc01_trailer")),
+ .ctr[2] = CTR(0x5555, SIG("pc01_trailer"),
+ SIG("pc01_trailer"),
+ SIG("pc01_trailer"),
+ SIG("pc01_trailer")),
+};
+
+/* geom_vertex_out_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_geom_vertex_out_count_event =
+{
+ .name = "geom_vertex_out_count",
+ .desc = "The number of vertices coming out of the geom unit
after any "
+ "geometry shader expansion.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_geom_vertex_out_count_query =
+{
+ .event = &nv50_geom_vertex_out_count_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_vattr_01")),
+};
+
+/* geom_primitive_in_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_geom_primitive_in_count_event =
+{
+ .name = "geom_primitive_in_count",
+ .desc = "The number of primitives input to the geom unit.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_geom_primitive_in_count_query =
+{
+ .event = &nv50_geom_primitive_in_count_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_vfetch_08",
+ SRC("pgraph_vfetch_unk0c_unk0", 0x0))),
+};
+
+/* geom_primitive_out_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_geom_primitive_out_count_event =
+{
+ .name = "geom_primitive_out_count",
+ .desc = "The number of primitives coming out the geom unit
after any "
+ "geometry shader expansion.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_geom_primitive_out_count_query =
+{
+ .event = &nv50_geom_primitive_out_count_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_vattr_00")),
+};
+
+/*
+ * STREAM OUT
+ */
+/* stream_out_busy */
+static const struct nv50_hw_pm_event_cfg
+nv50_so_busy_event =
+{
+ .name = "stream_out_busy",
+ .desc = "This unit manages the writing of vertices to the
frame buffer "
+ "when using stream out. If a significant number of
vertices are "
+ "written, this can become a bottleneck.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_so_busy_query =
+{
+ .event = &nv50_so_busy_event,
+ .ctr[0] = CTR(0x8888, SIG("pc01_strmout_00"),
+ SIG("pc01_strmout_01")),
+};
+
+/*
+ * SETUP
+ */
+/* setup_primitive_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_setup_primitive_count_event =
+{
+ .name = "setup_primitive_count",
+ .desc = "Returns the number of primitives processed in the
geometry "
+ "subsystem. This experiments counts points, lines and
triangles. "
+ "To count only triangles, use the setup_triangle_count
counter. "
+ "Balance these counts with the number of pixels being
drawn to "
+ "see if you could simplify your geometry and use "
+ "bump/displacement maps, for example.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_setup_primitive_count_query =
+{
+ .event = &nv50_setup_primitive_count_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_trast_00")),
+};
+
+/* setup_point_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_setup_point_count_event =
+{
+ .name = "setup_point_count",
+ .desc = "The number of points seen by the primitive setup unit
(just "
+ "before rasterization).",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_setup_point_count_query =
+{
+ .event = &nv50_setup_point_count_event,
+ .ctr[0] = CTR(0x8080, SIG("pc01_trast_01"),
+ SIG("pc01_trast_04"),
+ SIG("pc01_trast_05")),
+};
+
+/* setup_line_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_setup_line_count_event =
+{
+ .name = "setup_line_count",
+ .desc = "The number of lines seen by the primitive setup unit
(just "
+ "before rasterization).",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_setup_line_count_query =
+{
+ .event = &nv50_setup_line_count_event,
+ .ctr[0] = CTR(0x8080, SIG("pc01_trast_02"),
+ SIG("pc01_trast_04"),
+ SIG("pc01_trast_05")),
+};
+
+/* setup_triangle_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_setup_triangle_count_event =
+{
+ .name = "setup_triangle_count",
+ .desc = "Returns the number of triangles processed in the
geometry "
+ "subsystem.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_setup_triangle_count_query =
+{
+ .event = &nv50_setup_triangle_count_event,
+ .ctr[0] = CTR(0x8080, SIG("pc01_trast_03"),
+ SIG("pc01_trast_04"),
+ SIG("pc01_trast_05")),
+};
+
+/* setup_primitive_culled_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_setup_primitive_culled_count_event =
+{
+ .name = "setup_primitive_culled_count",
+ .desc = "Returns the number of primitives culled in primitive
setup. If "
+ "you are performing viewport culling, this gives you an "
+ "indication of the accuracy of the algorithm being
used, and can "
+ "give you and idea if you need to improves this
culling. This "
+ "includes primitives culled when using backface
culling. Drawing "
+ "a fully visible sphere on the screen should cull half
of the "
+ "triangles if backface culling is turned on and all the "
+ "triangles are ordered consistently (CW or CCW).",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_setup_primitive_culled_count_query =
+{
+ .event = &nv50_setup_primitive_culled_count_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc01_unk00")),
+};
+
+/*
+ * RASTERIZER
+ */
+/* rast_tiles_killed_by_zcull_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_rast_tiles_killed_by_zcull_event =
+{
+ .name = "rasterizer_tiles_killed_by_zcull_count",
+ .desc = "The number of pixels killed by the zcull unit in the
rasterizer.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_B6,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rast_tiles_killed_by_zcull_query =
+{
+ .event = &nv50_rast_tiles_killed_by_zcull_event,
+ .ctr[1] = CTR(0xffff, SIG("pc01_zcull_00",
+ SRC("pgraph_zcull_pm_unka4_unk0", 0x7)),
+ SIG("pc01_zcull_01"),
+ SIG("pc01_zcull_02"),
+ SIG("pc01_zcull_03")),
+ .ctr[2] = CTR(0x5555, SIG("pc01_trailer"),
+ SIG("pc01_trailer"),
+ SIG("pc01_zcull_04"),
+ SIG("pc01_zcull_05")),
+};
+
+/* rast_tiles_in_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_rast_tiles_in_count_event =
+{
+ .name = "rasterizer_tiles_in_count",
+ .desc = "Count of tiles (each of which contain 1-8 pixels)
seen by the "
+ "rasterizer stage.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_B6,
+ .domain = 1,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rast_tiles_in_count_query =
+{
+ .event = &nv50_rast_tiles_in_count_event,
+ .ctr[1] = CTR(0xffff, SIG("pc01_zcull_00",
+ SRC("pgraph_zcull_pm_unka4_unk0", 0x0)),
+ SIG("pc01_zcull_01"),
+ SIG("pc01_zcull_02"),
+ SIG("pc01_zcull_03")),
+ .ctr[2] = CTR(0x5555, SIG("pc01_trailer"),
+ SIG("pc01_trailer"),
+ SIG("pc01_zcull_04"),
+ SIG("pc01_zcull_05")),
+};
+
+/*
+ * ROP
+ */
+/* rop_busy */
+static const struct nv50_hw_pm_event_cfg
+nv50_rop_busy_event =
+{
+ .name = "rop_busy",
+ .desc = "Percentage of time that the ROP unit is actively
doing work. "
+ "This can be high if alpha blending is turned on, of
overdraw "
+ "is high, etc.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rop_busy_query =
+{
+ .event = &nv50_rop_busy_event,
+ .ctr[0] = CTR(0xf888, SIG("pc02_prop_02",
+ SRC("pgraph_tpc0_prop_pm_mux_sel", 0x0)),
+ SIG("pc02_prop_03"),
+ SIG("pc02_prop_04"),
+ SIG("pc02_prop_05")),
+};
+
+/* rop_waits_for_fb */
+static const struct nv50_hw_pm_event_cfg
+nv50_rop_waits_for_fb_event =
+{
+ .name = "rop_waits_for_fb",
+ .desc = "The amount of time the blending unit spent waiting
for data "
+ "from the frame buffer unit. If blending is enabled
and there "
+ "is a lot of traffic here (since this is a
read/modify/write "
+ "operation) this can become a bottleneck.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rop_waits_for_fb_query =
+{
+ .event = &nv50_rop_waits_for_fb_event,
+ .ctr[0] = CTR(0x22f2, SIG("pc02_crop_03",
+ SRC("pgraph_rop0_crop_pm_mux_sel0", 0x0)),
+ SIG("pc02_crop_02"),
+ SIG("pc02_zrop_03",
+ SRC("pgraph_rop0_zrop_pm_mux_sel0", 0x0)),
+ SIG("pc02_zrop_02")),
+};
+
+/* rop_waits_for_shader */
+static const struct nv50_hw_pm_event_cfg
+nv50_rop_waits_for_shader_event =
+{
+ .name = "rop_waits_for_shader",
+ .desc = "This is a measurement of how often the blending unit
was "
+ "waiting on new work (fragments to be placed into the
render "
+ "target). If the pixel shaders are particularly
expensive, the "
+ "ROP unit could be starved waiting for results.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rop_waits_for_shader_query =
+{
+ .event = &nv50_rop_waits_for_shader_event,
+ .ctr[0] = CTR(0x2222, SIG("pc02_prop_6",
+ SRC("pgraph_tpc0_prop_pm_mux_sel", 0x0)),
+ SIG("pc02_prop_7")),
+};
+
+/* rop_samples_killed_by_earlyz_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_rop_samples_killed_by_earlyz_event =
+{
+ .name = "rop_samples_killed_by_earlyz_count",
+ .desc = "This returns the number of pixels that were killed in
the "
+ "earlyZ hardware. This signal will give you an idea
of, for "
+ "instance, a Z only pass was successful in setting up
the depth "
+ "buffer.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_B6,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rop_samples_killed_by_earlyz_query =
+{
+ .event = &nv50_rop_samples_killed_by_earlyz_event,
+ .ctr[1] = CTR(0xffff, SIG("pc02_prop_00",
+ SRC("pgraph_tpc0_prop_pm_mux_sel", 0x1a)),
+ SIG("pc02_prop_01"),
+ SIG("pc02_prop_02"),
+ SIG("pc02_prop_03")),
+ .ctr[2] = CTR(0x5555, SIG("pc02_prop_07"),
+ SIG("pc02_trailer"),
+ SIG("pc02_prop_04"),
+ SIG("pc02_prop_05")),
+};
+
+/* rop_samples_killed_by_latez_count */
+static const struct nv50_hw_pm_event_cfg
+nv50_rop_samples_killed_by_latez_event =
+{
+ .name = "rop_samples_killed_by_latez_count",
+ .desc = "This returns the number of pixels that were killed
after the "
+ "pixel shader ran. This can happen if the early Z is
unable to "
+ "cull the pixel because of an API setup issue like
changing the "
+ "Z direction or modifying Z in the pixel shader.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_B6,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_rop_samples_killed_by_latez_query =
+{
+ .event = &nv50_rop_samples_killed_by_latez_event,
+ .ctr[1] = CTR(0xffff, SIG("pc02_prop_00",
+ SRC("pgraph_tpc0_prop_pm_mux_sel", 0x1b)),
+ SIG("pc02_prop_01"),
+ SIG("pc02_prop_02"),
+ SIG("pc02_prop_03")),
+ .ctr[2] = CTR(0x5555, SIG("pc02_prop_07"),
+ SIG("pc02_trailer"),
+ SIG("pc02_prop_04"),
+ SIG("pc02_prop_05")),
+};
+
+/*
+ * TEXTURE
+ */
+/* tex_cache_miss */
+static const struct nv50_hw_pm_event_cfg
+nv50_tex_cache_miss_event =
+{
+ .name = "tex_cache_miss",
+ .desc = "Number of texture cache misses.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_tex_cache_miss_query =
+{
+ .event = &nv50_tex_cache_miss_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_04",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x200))),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv84_tex_cache_miss_query =
+{
+ .event = &nv50_tex_cache_miss_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_04",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x800))),
+};
+
+/* tex_cache_hit */
+static const struct nv50_hw_pm_event_cfg
+nv50_tex_cache_hit_event =
+{
+ .name = "tex_cache_hit",
+ .desc = "Number of texture cache hits.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RAW,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_tex_cache_hit_query =
+{
+ .event = &nv50_tex_cache_hit_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_05",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x200))),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv84_tex_cache_hit_query =
+{
+ .event = &nv50_tex_cache_hit_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_05",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x800))),
+};
+
+/* tex_waits_for_fb */
+static const struct nv50_hw_pm_event_cfg
+nv50_tex_waits_for_fb_event =
+{
+ .name = "tex_waits_for_fb",
+ .desc = "This is the amount of time the texture unit spent
waiting on "
+ "samples to return from the frame buffer unit. It is a
potential "
+ "indication of poor texture cache utilization.",
+ .display = NV50_HW_PM_EVENT_DISPLAY_RATIO,
+ .count = NV50_HW_PM_EVENT_COUNT_SIMPLE,
+ .domain = 2,
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv50_tex_waits_for_fb_query =
+{
+ .event = &nv50_tex_waits_for_fb_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_06",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x200))),
+};
+
+static const struct nv50_hw_pm_query_cfg
+nv84_tex_waits_for_fb_query =
+{
+ .event = &nv50_tex_waits_for_fb_event,
+ .ctr[0] = CTR(0xaaaa, SIG("pc02_tex_06",
+ SRC("pgraph_tpc0_tex_unk08_unk0",
0x800))),
+};
+
+static const struct nv50_hw_pm_query_cfg
*nv50_hw_pm_queries[NV50_HW_PM_QUERY_COUNT];
+
+#define _Q(n, q) nv50_hw_pm_queries[NV50_HW_PM_QUERY_##n] = &q;
+
+static void
+nv50_identify_events(struct nv50_screen *screen)
+{
+ _Q(GPU_IDLE, nv50_gpu_idle_query);
+ _Q(IA_BUSY, nv50_ia_busy_query);
+ _Q(IA_WAITS_FOR_FB, nv50_ia_waits_for_fb_query);
+ _Q(VERTEX_ATTR_COUNT, nv50_vertex_attr_count_query);
+ _Q(GEOM_VERTEX_IN_COUNT, nv50_geom_vertex_in_count_query);
+ _Q(GEOM_VERTEX_OUT_COUNT, nv50_geom_vertex_out_count_query);
+ _Q(GEOM_PRIMITIVE_IN_COUNT, nv50_geom_primitive_in_count_query);
+ _Q(GEOM_PRIMITIVE_OUT_COUNT, nv50_geom_primitive_out_count_query);
+ _Q(SO_BUSY, nv50_so_busy_query);
+ _Q(SETUP_PRIMITIVE_COUNT, nv50_setup_primitive_count_query);
+ _Q(SETUP_POINT_COUNT, nv50_setup_point_count_query);
+ _Q(SETUP_LINE_COUNT, nv50_setup_line_count_query);
+ _Q(SETUP_TRIANGLE_COUNT, nv50_setup_triangle_count_query);
+ _Q(SETUP_PRIMITIVE_CULLED_COUNT,
nv50_setup_primitive_culled_count_query);
+ _Q(RAST_TILES_KILLED_BY_ZCULL,
nv50_rast_tiles_killed_by_zcull_query);
+ _Q(RAST_TILES_IN_COUNT, nv50_rast_tiles_in_count_query);
+ _Q(ROP_BUSY, nv50_rop_busy_query);
+ _Q(ROP_WAITS_FOR_FB, nv50_rop_waits_for_fb_query);
+ _Q(ROP_WAITS_FOR_SHADER, nv50_rop_waits_for_shader_query);
+ _Q(ROP_SAMPLES_KILLED_BY_EARLYZ,
nv50_rop_samples_killed_by_earlyz_query);
+ _Q(ROP_SAMPLES_KILLED_BY_LATEZ,
nv50_rop_samples_killed_by_latez_query );
+ _Q(TEX_CACHE_MISS, nv50_tex_cache_miss_query);
+ _Q(TEX_CACHE_HIT, nv50_tex_cache_hit_query);
+ _Q(TEX_WAITS_FOR_FB, nv50_tex_waits_for_fb_query);
+
+ if (screen->base.class_3d >= NV84_3D_CLASS) {
+ /* Variants for NV84+ */
+ _Q(TEX_CACHE_MISS, nv84_tex_cache_miss_query);
+ _Q(TEX_CACHE_HIT, nv84_tex_cache_hit_query);
+ _Q(TEX_WAITS_FOR_FB, nv84_tex_waits_for_fb_query);
+ }
+
+ if (screen->base.class_3d >= NVA0_3D_CLASS) {
+ /* Variants for NVA0+ */
+ _Q(IA_BUSY, nva0_ia_busy_query);
+ _Q(IA_WAITS_FOR_FB, nva0_ia_waits_for_fb_query);
+ _Q(VERTEX_ATTR_COUNT, nva0_vertex_attr_count_query);
+ }
+}
+
+#undef _Q
+
+#ifdef DEBUG