What's the perf impact on a platform which actually needs this? I think there would be some further substantial gains to be had by giving this its own dirty bit -- piles of other atoms listen to BRW_NEW_VERTEX_PROGRAM, but don't care about the workaround bits.
On Thu, Dec 4, 2014 at 2:42 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > BRW_NEW_VERTICES is flagged every time we draw a primitive. Having > the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to > compute the VS program key and do a program cache lookup for every > single primitive. This is painfully expensive. > > The workaround bit computation is almost entirely based on the vertex > attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs. > The only thing it uses the VS program for is to see which VS inputs are > actually read. brw_merge_inputs() happens once per primitive, and can > safely look at the currently bound vertex program, as it doesn't change > in the middle of a draw. > > This patch moves the workaround bit computation to brw_merge_inputs(), > right after assigning brw->vb.inputs[i], and stores the previous WA bit > values in the context. If they've actually changed from the last draw > (which is uncommon), we signal that we need a new vertex program, > causing brw_vs_prog to compute a new key. > > Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS > state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should > work just as well, and already exists. > > Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my > Haswell GT3e system. I'm told that Baytrail shows similar gains. > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > --- > src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++ > src/mesa/drivers/dri/i965/brw_draw.c | 46 > +++++++++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_vs.c | 42 +++--------------------------- > 3 files changed, 58 insertions(+), 38 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index ec4b3dd..ace29df 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -1129,6 +1129,14 @@ struct brw_context > * the same VB packed over and over again. > */ > unsigned int start_vertex_bias; > + > + /** > + * Certain vertex attribute formats aren't natively handled by the > + * hardware and require special VS code to fix up their values. > + * > + * These bitfields indicate which workarounds are needed. > + */ > + uint8_t attrib_wa_flags[VERT_ATTRIB_MAX]; > } vb; > > struct { > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 4c2802a..d2ea085 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -46,6 +46,7 @@ > #include "brw_defines.h" > #include "brw_context.h" > #include "brw_state.h" > +#include "brw_vs.h" > > #include "intel_batchbuffer.h" > #include "intel_buffers.h" > @@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw, > static void brw_merge_inputs( struct brw_context *brw, > const struct gl_client_array *arrays[]) > { > + const struct gl_context *ctx = &brw->ctx; > GLuint i; > > for (i = 0; i < brw->vb.nr_buffers; i++) { > @@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw, > brw->vb.inputs[i].buffer = -1; > brw->vb.inputs[i].glarray = arrays[i]; > } > + > + if (brw->gen < 8 && !brw->is_haswell) { > + struct gl_program *vp = &ctx->VertexProgram._Current->Base; > + /* Prior to Haswell, the hardware can't natively support GL_FIXED or > + * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. > + */ > + for (i = 0; i < VERT_ATTRIB_MAX; i++) { > + if (!(vp->InputsRead & BITFIELD64_BIT(i))) > + continue; > + > + uint8_t wa_flags = 0; > + > + switch (brw->vb.inputs[i].glarray->Type) { > + > + case GL_FIXED: > + wa_flags = brw->vb.inputs[i].glarray->Size; > + break; > + > + case GL_INT_2_10_10_10_REV: > + wa_flags |= BRW_ATTRIB_WA_SIGN; > + /* fallthough */ > + > + case GL_UNSIGNED_INT_2_10_10_10_REV: > + if (brw->vb.inputs[i].glarray->Format == GL_BGRA) > + wa_flags |= BRW_ATTRIB_WA_BGRA; > + > + if (brw->vb.inputs[i].glarray->Normalized) > + wa_flags |= BRW_ATTRIB_WA_NORMALIZE; > + else if (!brw->vb.inputs[i].glarray->Integer) > + wa_flags |= BRW_ATTRIB_WA_SCALE; > + > + break; > + } > + > + if (brw->vb.attrib_wa_flags[i] != wa_flags) { > + brw->vb.attrib_wa_flags[i] = wa_flags; > + /* New workaround flags means we need to compute a new program > + * key and look up the associated vertex shader. Just flag that > + * we have a new shader program, since it triggers that effect. > + */ > + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; > + } > + } > + } > } > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_vs.c > b/src/mesa/drivers/dri/i965/brw_vs.c > index 2f628e5..6dbac1e 100644 > --- a/src/mesa/drivers/dri/i965/brw_vs.c > +++ b/src/mesa/drivers/dri/i965/brw_vs.c > @@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) > brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, > &key.base.tex); > > - /* BRW_NEW_VERTICES */ > - if (brw->gen < 8 && !brw->is_haswell) { > - /* Prior to Haswell, the hardware can't natively support GL_FIXED or > - * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. > - */ > - for (i = 0; i < VERT_ATTRIB_MAX; i++) { > - if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i))) > - continue; > - > - uint8_t wa_flags = 0; > - > - switch (brw->vb.inputs[i].glarray->Type) { > - > - case GL_FIXED: > - wa_flags = brw->vb.inputs[i].glarray->Size; > - break; > - > - case GL_INT_2_10_10_10_REV: > - wa_flags |= BRW_ATTRIB_WA_SIGN; > - /* fallthough */ > - > - case GL_UNSIGNED_INT_2_10_10_10_REV: > - if (brw->vb.inputs[i].glarray->Format == GL_BGRA) > - wa_flags |= BRW_ATTRIB_WA_BGRA; > - > - if (brw->vb.inputs[i].glarray->Normalized) > - wa_flags |= BRW_ATTRIB_WA_NORMALIZE; > - else if (!brw->vb.inputs[i].glarray->Integer) > - wa_flags |= BRW_ATTRIB_WA_SCALE; > - > - break; > - } > - > - key.gl_attrib_wa_flags[i] = wa_flags; > - } > - } > + /* BRW_NEW_VERTEX_PROGRAM */ > + memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags, > + sizeof(brw->vb.attrib_wa_flags)); > > if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, > &key, sizeof(key), > @@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = { > _NEW_POLYGON | > _NEW_TEXTURE | > _NEW_TRANSFORM, > - .brw = BRW_NEW_VERTEX_PROGRAM | > - BRW_NEW_VERTICES, > + .brw = BRW_NEW_VERTEX_PROGRAM, > }, > .emit = brw_upload_vs_prog > }; > -- > 2.1.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev