On 2015-09-24 22:20:40, Kenneth Graunke wrote: > Broadwell's 3DSTATE_GS contains new "Static Output" and "Static Vertex > Count" fields, which control a new optimization. Normally, geometry > shaders can output arbitrary numbers of vertices, which means that > resource allocation has to be done on the fly. However, if the number > of vertices is statically known, the hardware can pre-allocate resources > up front, which is more efficient. > > Thanks to the new NIR GS intrinsics, this is easy. We just call the > function introduced in the previous commit to get the vertex count. > If it obtains a count, we stop emitting the extra 32-bit "Vertex Count" > field in the VUE, and instead fill out the 3DSTATE_GS fields. > > Improves performance of Gl32GSCloth by 5.16347% +/- 0.12611% (n=91) > on my Lenovo X250 laptop (Broadwell GT2) at 1024x768. > > shader-db statistics for geometry shaders only: > > total instructions in shared programs: 3227 -> 3207 (-0.62%) > instructions in affected programs: 242 -> 222 (-8.26%) > helped: 10 > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_context.h | 5 +++++ > src/mesa/drivers/dri/i965/brw_defines.h | 5 +++++ > src/mesa/drivers/dri/i965/brw_gs.c | 5 +++++ > src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 11 +++++++---- > src/mesa/drivers/dri/i965/gen8_gs_state.c | 6 ++++++ > 5 files changed, 28 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index b05b8bd..5c31ba4 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -782,6 +782,11 @@ struct brw_gs_prog_data > > bool include_primitive_id; > > + /** > + * The number of vertices emitted, if constant - otherwise -1. > + */ > + int static_vertex_count; > + > int invocations; > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index f9d8d1b..6d94a6f 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1960,6 +1960,11 @@ enum brw_message_target { > # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) > # define GEN6_GS_ENABLE (1 << 15) > > +/* Gen8+ DW8 */ > +# define GEN8_GS_STATIC_OUTPUT (1 << 30) > +# define GEN8_GS_STATIC_VERTEX_COUNT_SHIFT 16 > +# define GEN8_GS_STATIC_VERTEX_COUNT_MASK INTEL_MASK(26, 16) > + > /* Gen8+ DW9 */ > # define GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT 21 > # define GEN8_GS_URB_OUTPUT_LENGTH_SHIFT 16 > diff --git a/src/mesa/drivers/dri/i965/brw_gs.c > b/src/mesa/drivers/dri/i965/brw_gs.c > index 16ea684..111cf93 100644 > --- a/src/mesa/drivers/dri/i965/brw_gs.c > +++ b/src/mesa/drivers/dri/i965/brw_gs.c > @@ -73,6 +73,11 @@ brw_codegen_gs_prog(struct brw_context *brw, > c.prog_data.base.base.nr_params = param_count; > c.prog_data.base.base.nr_image_params = gs->NumImages; > > + if (brw->gen >= 8) { > + c.prog_data.static_vertex_count = > + nir_gs_count_vertices(gp->program.Base.nir); > + }
It looks like static_vertex_count will always be 0 for gen < 8 which I guess is an invalid value. It looks the code that uses it always checks gen >= 8, so it should be fine. Series Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > if (brw->gen >= 7) { > if (gp->program.OutputType == GL_POINTS) { > /* When the output type is points, the geometry shader may output > data > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > index ff5bd98..acf0501 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp > @@ -234,17 +234,20 @@ vec4_gs_visitor::emit_thread_end() > */ > int base_mrf = 1; > > + bool static_vertex_count = c->prog_data.static_vertex_count != -1; > + > current_annotation = "thread end"; > dst_reg mrf_reg(MRF, base_mrf); > src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); > vec4_instruction *inst = emit(MOV(mrf_reg, r0)); > inst->force_writemask_all = true; > - emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); > + if (devinfo->gen < 8 || !static_vertex_count) > + emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); > if (INTEL_DEBUG & DEBUG_SHADER_TIME) > emit_shader_time_end(); > inst = emit(GS_OPCODE_THREAD_END); > inst->base_mrf = base_mrf; > - inst->mlen = devinfo->gen >= 8 ? 2 : 1; > + inst->mlen = devinfo->gen >= 8 && !static_vertex_count ? 2 : 1; > } > > > @@ -284,7 +287,7 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) > /* We need to increment Global Offset by 1 to make room for Broadwell's > * extra "Vertex Count" payload at the beginning of the URB entry. > */ > - if (devinfo->gen >= 8) > + if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) > inst->offset++; > > inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; > @@ -421,7 +424,7 @@ vec4_gs_visitor::emit_control_data_bits() > * URB entry. Since this is an OWord message, Global Offset is counted > * in 128-bit units, so we must set it to 2. > */ > - if (devinfo->gen >= 8) > + if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1) > inst->offset = 2; > inst->base_mrf = base_mrf; > inst->mlen = 2; > diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c > b/src/mesa/drivers/dri/i965/gen8_gs_state.c > index 81bd3b2..4195f4c 100644 > --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c > @@ -90,6 +90,12 @@ gen8_upload_gs_state(struct brw_context *brw) > uint32_t dw8 = brw->gs.prog_data->control_data_format << > HSW_GS_CONTROL_DATA_FORMAT_SHIFT; > > + if (brw->gs.prog_data->static_vertex_count != -1) { > + dw8 |= GEN8_GS_STATIC_OUTPUT | > + SET_FIELD(brw->gs.prog_data->static_vertex_count, > + GEN8_GS_STATIC_VERTEX_COUNT); > + } > + > if (brw->gen < 9) > dw7 |= (brw->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT; > else > -- > 2.5.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev