Since there will be a v3 anyway, nits below... On 12/02/2014 06:33 PM, Ben Widawsky wrote: > This patch implements ARB_pipeline_statistics_query. This addition to GL does > not add a new API. Instead, it adds new tokens to the existing query APIs. The > work to hook up the new tokens is trivial due to it's similarity to the > previous > work done for the query APIs. I've implemented all the new tokens to some > degree, but have stubbed out the untested ones at the entry point for Begin(). > Doing this should allow the remainder of the code to be left in. > > The new tokens give GL clients a way to obtain stats about the GL pipeline. > Generally, you get the number of things going in, invocations, and number of > things coming out, primitives, of the various stages. There are two immediate > uses for this, performance information, and debugging various types of > misrendering. I doubt one can use these for debugging very complex > applications, > but for piglit tests, it should be quite useful. > > Tessellation shaders, and compute shaders are not addressed in this patch > because there is no upstream implementation. I've implemented how I believe > tessellation shader stats will work for Intel hardware (though there is a bit > of > ambiguity). Compute shaders are a bit more interesting though, and I don't yet > know what we'll do there.
> > For the lazy, here is a link to the relevant part of the spec: > https://www.opengl.org/registry/specs/ARB/pipeline_statistics_query.txt > > Running the piglit tests > http://lists.freedesktop.org/archives/piglit/2014-November/013321.html > (http://cgit.freedesktop.org/~bwidawsk/piglit/log/?h=pipe_stats) > yield the following results: > >> python2 ./piglit-run.py -t stats tests/all.py output/pipeline_stats >> [5/5] pass: 5 Running Test(s): 5 > > Previously I was seeing the adjacent vertex test failing on certain Intel > hardware. I am currently not able to reproduce this, and therefore for now, > I'll > assume it was some transient issue which has been fixed. > > v2: > - Don't allow pipeline_stats to be per stream (Ilia). This would be needed for > AMD_transform_feedback4, which we do not support. > > If AMD_transform_feedback4 is supported then GEOMETRY_SHADER_PRIMITIVES_- > > EMITTED_ARB counts primitives emitted to any of the vertex streams for > which > > STREAM_RASTERIZATION_AMD is enabled. > - Remove comment from GL3.txt because it is only used for extensions that are > part of required versions (Ilia) > - Move the new tokens to a new XML doc instead of using the main GL4x.xml > (Ilia) > - Add a fallthrough comment (Ilia) > - Only divide PS invocations by 4 on HSW+ (Ben) > > Cc: Ilia Mirkin <imir...@alum.mit.edu> > Signed-off-by: Ben Widawsky <b...@bwidawsk.net> > --- > .../glapi/gen/ARB_pipeline_statistics_query.xml | 24 ++++ > src/mesa/drivers/dri/i965/gen6_queryobj.c | 121 > +++++++++++++++++++++ > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > src/mesa/main/config.h | 3 + > src/mesa/main/extensions.c | 1 + > src/mesa/main/mtypes.h | 15 +++ > src/mesa/main/queryobj.c | 77 +++++++++++++ > 7 files changed, 242 insertions(+) > create mode 100644 src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml > > diff --git a/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml > b/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml > new file mode 100644 > index 0000000..db37267 > --- /dev/null > +++ b/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml > @@ -0,0 +1,24 @@ > +<?xml version="1.0"?> > +<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd"> > + > +<!-- Note: no GLX protocol info yet. --> > + > +<OpenGLAPI> > + > +<category name="GL_ARB_pipeline_statistics_query" number="100"> > + > + <enum name="VERTICES_SUBMITTED" value="0x82EE"/> > + <enum name="PRIMITIVES_SUBMITTED" value="0x82EF"/> > + <enum name="VERTEX_SHADER_INVOCATIONS" value="0x82F0"/> > + <enum name="TESS_CONTROL_SHADER_PATCHES" value="0x82F1"/> > + <enum name="TESS_EVALUATION_SHADER_INVOCATIONS" value="0x82F2"/> > + <!-- <enum name="GEOMETRY_SHADER_INVOCATIONS" value="0x887F"/> > --> > + <enum name="GEOMETRY_SHADER_PRIMITIVES_EMITTED" value="0x82F3"/> > + <enum name="FRAGMENT_SHADER_INVOCATIONS" value="0x82F4"/> > + <enum name="COMPUTE_SHADER_INVOCATIONS" value="0x82F5"/> > + <enum name="CLIPPING_INPUT_PRIMITIVES" value="0x82F6"/> > + <enum name="CLIPPING_OUTPUT_PRIMITIVES" value="0x82F7"/> > + > +</category> > + > +</OpenGLAPI> > diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c > b/src/mesa/drivers/dri/i965/gen6_queryobj.c > index 130236e..f8b9bc3 100644 > --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c > +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c > @@ -109,6 +109,73 @@ write_xfb_primitives_written(struct brw_context *brw, > } > } > > +static inline const int > +pipeline_target_to_index(int target) > +{ > + if (target == GL_GEOMETRY_SHADER_INVOCATIONS) > + return MAX_PIPELINE_STATISTICS - 1; > + else > + return target - GL_VERTICES_SUBMITTED_ARB; > +} > + > +static void > +emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, > + int stream, int target, int idx) > +{ > + /* > + * There are 2 confusing parts to implementing the various target. The > first is > + * the distinction between vertices submitted and primitives submitted. > The > + * spec tries to clear this up, and luckily our hardware seems to > understand > + * it: > + * > + * (8) What stage the VERTICES_SUBMITTED_ARB and PRIMITIVES_SUBMITTED_ARB > + * belong to? What do they count? > + * > + * DISCUSSION: There is no separate pipeline stage introduced in the > + * specification that matches D3D's "input assembler" stage. While the > + * latest version of the GL specification mentions a "vertex puller" > stage > + * in the pipeline diagram, this stage does not have a corresponding > chapter > + * in the specification that introduces it. > + * > + * RESOLVED: Introduce VERTICES_SUBMITTED_ARB and > PRIMITIVES_SUBMITTED_ARB > + * in chapter 10, Vertex Specification and Drawing Command. They count > the > + * total number of vertices and primitives processed by the GL. > Including > + * multiple instances. > + * > + * The second confusion is the tessellation shader statistics. Our > hardware has > + * no statistics specific to the TE unit. Ideally we could have the HS > + * primitives for TESS_CONTROL_SHADER_PATCHES_ARB, and the DS invocations > as > + * the register for TESS_CONTROL_SHADER_PATCHES_ARB. Unfortunately we > don't > + * have HS primitives, we only have HS invocations. > + */ > + > + /* Everything except GEOMETRY_SHADER_INVOCATIONS can be kept in a simple > + * lookup table */ */ should be on it's own line (here and elsewhere). > + const uint32_t target_to_register[] = { static. Otherwise GCC emits code to initialize the array. > + IA_VERTICES_COUNT, /* VERTICES_SUBMITTED */ > + IA_PRIMITIVES_COUNT, /* PRIMITIVES_SUBMITTED */ > + VS_INVOCATION_COUNT, /* VERTEX_SHADER_INVOCATIONS */ > + 0, /* HS_INVOCATION_COUNT,*/ /* TESS_CONTROL_SHADER_PATCHES */ > + 0, /* DS_INVOCATION_COUNT,*/ /* TESS_EVALUATION_SHADER_INVOCATIONS */ > + GS_PRIMITIVES_COUNT, /* GEOMETRY_SHADER_PRIMITIVES_EMITTED */ > + PS_INVOCATION_COUNT, /* FRAGMENT_SHADER_INVOCATIONS */ > + 0, /* COMPUTE_SHADER_INVOCATIONS */ > + CL_INVOCATION_COUNT, /* CLIPPING_INPUT_PRIMITIVES */ > + CL_PRIMITIVES_COUNT, /* CLIPPING_OUTPUT_PRIMITIVES */ > + GS_INVOCATION_COUNT /* This one is special... */ > + }; > + STATIC_ASSERT(ARRAY_SIZE(target_to_register) == MAX_PIPELINE_STATISTICS); > + uint32_t reg = target_to_register[pipeline_target_to_index(target)]; > + assert(reg != 0); > + > + /* Emit a flush to make sure various parts of the pipeline are complete > and > + * we get an accurate value */ > + intel_batchbuffer_emit_mi_flush(brw); > + > + brw_store_register_mem64(brw, bo, reg, idx); > +} > + > + > /** > * Wait on the query object's BO and calculate the final result. > */ > @@ -182,9 +249,29 @@ gen6_queryobj_get_results(struct gl_context *ctx, > > case GL_PRIMITIVES_GENERATED: > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: > + case GL_VERTICES_SUBMITTED_ARB: > + case GL_PRIMITIVES_SUBMITTED_ARB: > + case GL_VERTEX_SHADER_INVOCATIONS_ARB: > + case GL_GEOMETRY_SHADER_INVOCATIONS: > + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: > + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > query->Base.Result = results[1] - results[0]; > break; > > + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: > + query->Base.Result = (results[1] - results[0]); > + /* HW reports this count 4X the actual value and therefore SW must > divide > + * the count by 4 for correct reporting. > + * XXX: Not in public docs? > + */ > + if (brw->gen >= 8 || brw->is_haswell) > + query->Base.Result /= 4; > + break; > + > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > default: > unreachable("Unrecognized query target in brw_queryobj_get_results()"); > } > @@ -251,6 +338,20 @@ gen6_begin_query(struct gl_context *ctx, struct > gl_query_object *q) > write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0); > break; > > + case GL_VERTICES_SUBMITTED_ARB: > + case GL_PRIMITIVES_SUBMITTED_ARB: > + case GL_VERTEX_SHADER_INVOCATIONS_ARB: > + case GL_GEOMETRY_SHADER_INVOCATIONS: > + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: > + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: > + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > + emit_pipeline_stat(brw, query->bo, query->Base.Stream, > query->Base.Target, 0); > + break; > + > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > default: > unreachable("Unrecognized query target in brw_begin_query()"); > } > @@ -289,6 +390,26 @@ gen6_end_query(struct gl_context *ctx, struct > gl_query_object *q) > write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1); > break; > > + case GL_VERTICES_SUBMITTED_ARB: > + case GL_PRIMITIVES_SUBMITTED_ARB: > + case GL_VERTEX_SHADER_INVOCATIONS_ARB: > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: > + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: > + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > + emit_pipeline_stat(brw, query->bo, > + query->Base.Stream, query->Base.Target, 1); > + break; > + case GL_GEOMETRY_SHADER_INVOCATIONS: > + /* GEOMETRY_SHADER_INVOCATIONS has a weirdly numbered target */ > + emit_pipeline_stat(brw, query->bo, query->Base.Stream, > + GL_VERTICES_SUBMITTED_ARB + MAX_PIPELINE_STATISTICS > - 1, Is this actually necessary? It looks like this parameter is eventually just passed to pipeline_target_to_index, which does the right thing with GL_GEOMETRY_SHADER_INVOCATIONS. > + 1); > + break; > + > default: > unreachable("Unrecognized query target in brw_end_query()"); > } > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index bbbb76f..a3645b9 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -186,6 +186,7 @@ intelInitExtensions(struct gl_context *ctx) > ctx->Extensions.ARB_map_buffer_range = true; > ctx->Extensions.ARB_occlusion_query = true; > ctx->Extensions.ARB_occlusion_query2 = true; > + ctx->Extensions.ARB_pipeline_statistics_query = true; > ctx->Extensions.ARB_point_sprite = true; > ctx->Extensions.ARB_seamless_cube_map = true; > ctx->Extensions.ARB_shader_bit_encoding = true; > diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h > index 4ec4b75..50e87fa 100644 > --- a/src/mesa/main/config.h > +++ b/src/mesa/main/config.h > @@ -300,6 +300,9 @@ > #define MAX_COMPUTE_IMAGE_UNIFORMS 8 > /*@}*/ > > +/** For GL_ARB_pipeline_statistics_query */ > +#define MAX_PIPELINE_STATISTICS 11 > + > /* > * Color channel component order > * > diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c > index 0df04c2..997c774 100644 > --- a/src/mesa/main/extensions.c > +++ b/src/mesa/main/extensions.c > @@ -133,6 +133,7 @@ static const struct extension extension_table[] = { > { "GL_ARB_multitexture", o(dummy_true), > GLL, 1998 }, > { "GL_ARB_occlusion_query2", o(ARB_occlusion_query2), > GL, 2003 }, > { "GL_ARB_occlusion_query", o(ARB_occlusion_query), > GLL, 2001 }, > + { "GL_ARB_pipeline_statistics_query", > o(ARB_pipeline_statistics_query), GL, 2014 }, > { "GL_ARB_pixel_buffer_object", > o(EXT_pixel_buffer_object), GL, 2004 }, > { "GL_ARB_point_parameters", o(EXT_point_parameters), > GLL, 1997 }, > { "GL_ARB_point_sprite", o(ARB_point_sprite), > GL, 2003 }, > diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h > index 7389baa..04a87be 100644 > --- a/src/mesa/main/mtypes.h > +++ b/src/mesa/main/mtypes.h > @@ -3053,6 +3053,9 @@ struct gl_query_state > /** GL_ARB_timer_query */ > struct gl_query_object *TimeElapsed; > > + /** GL_ARB_pipeline_statistics_query */ > + struct gl_query_object *pipeline_stats[MAX_PIPELINE_STATISTICS]; > + > GLenum CondRenderMode; > }; > > @@ -3439,6 +3442,17 @@ struct gl_constants > GLuint Timestamp; > GLuint PrimitivesGenerated; > GLuint PrimitivesWritten; > + GLuint VerticesSubmitted; > + GLuint PrimitivesSubmitted; > + GLuint VsInvocations; > + GLuint TessPatches; > + GLuint TessInvocations; > + GLuint GsInvocations; > + GLuint GsPrimitives; > + GLuint FsInvocations; > + GLuint ComputeInvocations; > + GLuint ClInPrimitives; > + GLuint ClOutPrimitives; > } QueryCounterBits; > > GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */ > @@ -3735,6 +3749,7 @@ struct gl_extensions > GLboolean ARB_map_buffer_range; > GLboolean ARB_occlusion_query; > GLboolean ARB_occlusion_query2; > + GLboolean ARB_pipeline_statistics_query; > GLboolean ARB_point_sprite; > GLboolean ARB_sample_shading; > GLboolean ARB_seamless_cube_map; > diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c > index 932359c..6cbacb3 100644 > --- a/src/mesa/main/queryobj.c > +++ b/src/mesa/main/queryobj.c > @@ -142,6 +142,14 @@ _mesa_init_query_object_functions(struct > dd_function_table *driver) > driver->CheckQuery = _mesa_check_query; > } > > +static struct gl_query_object ** > +get_pipe_stats_binding_point(struct gl_context *ctx, > + GLenum target) > +{ > + const int which = target - GL_VERTICES_SUBMITTED_ARB; > + assert(which < MAX_PIPELINE_STATISTICS); > + return &ctx->Query.pipeline_stats[which]; > +} > > /** > * Return pointer to the query object binding point for the given target and > @@ -183,6 +191,42 @@ get_query_binding_point(struct gl_context *ctx, GLenum > target, GLuint index) > return &ctx->Query.PrimitivesWritten[index]; > else > return NULL; > + > + case GL_VERTICES_SUBMITTED_ARB: > + case GL_PRIMITIVES_SUBMITTED_ARB: > + case GL_VERTEX_SHADER_INVOCATIONS_ARB: > + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: > + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > + if (ctx->Extensions.ARB_pipeline_statistics_query) > + return get_pipe_stats_binding_point(ctx, target); > + else > + return NULL; > + > + case GL_GEOMETRY_SHADER_INVOCATIONS: > + /* GL_GEOMETRY_SHADER_INVOCATIONS is defined in a non-sequential order > */ > + target = GL_VERTICES_SUBMITTED_ARB + MAX_PIPELINE_STATISTICS - 1; > + /* fallthrough */ > + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: > + if (ctx->Version >= 32 || ctx->Extensions.ARB_geometry_shader4) if (_mesa_has_geometry_shaders(ctx)) > + return get_pipe_stats_binding_point(ctx, target); > + else > + return NULL; > + > +#if 0 /* Not yet testable */ > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > + if (ctx->Version >= 40 || ctx->Extensions.ARB_tessellation_shader) > + return get_pipe_stats_binding_point(ctx, target, index); > + else > + return NULL; > + > + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > + if (ctx->Version >= 43 || ctx->Extensions.ARB_compute_shader) > + return get_pipe_stats_binding_point(ctx, target, index); > + return NULL; > +#endif > + > default: > return NULL; > } > @@ -553,6 +597,39 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, > GLenum pname, > case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: > *params = ctx->Const.QueryCounterBits.PrimitivesWritten; > break; > + case GL_VERTICES_SUBMITTED_ARB: > + *params = ctx->Const.QueryCounterBits.VerticesSubmitted; > + break; > + case GL_PRIMITIVES_SUBMITTED_ARB: > + *params = ctx->Const.QueryCounterBits.PrimitivesSubmitted; > + break; > + case GL_VERTEX_SHADER_INVOCATIONS_ARB: > + *params = ctx->Const.QueryCounterBits.VsInvocations; > + break; > + case GL_TESS_CONTROL_SHADER_PATCHES_ARB: > + *params = ctx->Const.QueryCounterBits.TessPatches; > + break; > + case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB: > + *params = ctx->Const.QueryCounterBits.TessInvocations; > + break; > + case GL_GEOMETRY_SHADER_INVOCATIONS: > + *params = ctx->Const.QueryCounterBits.GsInvocations; > + break; > + case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB: > + *params = ctx->Const.QueryCounterBits.GsPrimitives; > + break; > + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: > + *params = ctx->Const.QueryCounterBits.FsInvocations; > + break; > + case GL_COMPUTE_SHADER_INVOCATIONS_ARB: > + *params = ctx->Const.QueryCounterBits.ComputeInvocations; > + break; > + case GL_CLIPPING_INPUT_PRIMITIVES_ARB: > + *params = ctx->Const.QueryCounterBits.ClInPrimitives; > + break; > + case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB: > + *params = ctx->Const.QueryCounterBits.ClOutPrimitives; > + break; > default: > _mesa_problem(ctx, > "Unknown target in glGetQueryIndexediv(target = > %s)", > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev