Half-ignore the first part of that... I thinko'd and assumed Baytrail worked the same as Haswell here, but obviously doesn't. I am interested in how it affects earlier gens, though.
On Thu, Dec 4, 2014 at 3:13 PM, Chris Forbes <chr...@ijw.co.nz> wrote: > What's the perf impact on a platform which actually needs this? > > I think there would be some further substantial gains to be had by > giving this its own dirty bit -- piles of other atoms listen to > BRW_NEW_VERTEX_PROGRAM, but don't care about the workaround bits. > > On Thu, Dec 4, 2014 at 2:42 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: >> BRW_NEW_VERTICES is flagged every time we draw a primitive. Having >> the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to >> compute the VS program key and do a program cache lookup for every >> single primitive. This is painfully expensive. >> >> The workaround bit computation is almost entirely based on the vertex >> attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs. >> The only thing it uses the VS program for is to see which VS inputs are >> actually read. brw_merge_inputs() happens once per primitive, and can >> safely look at the currently bound vertex program, as it doesn't change >> in the middle of a draw. >> >> This patch moves the workaround bit computation to brw_merge_inputs(), >> right after assigning brw->vb.inputs[i], and stores the previous WA bit >> values in the context. If they've actually changed from the last draw >> (which is uncommon), we signal that we need a new vertex program, >> causing brw_vs_prog to compute a new key. >> >> Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS >> state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should >> work just as well, and already exists. >> >> Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my >> Haswell GT3e system. I'm told that Baytrail shows similar gains. >> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> --- >> src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++ >> src/mesa/drivers/dri/i965/brw_draw.c | 46 >> +++++++++++++++++++++++++++++++++ >> src/mesa/drivers/dri/i965/brw_vs.c | 42 +++--------------------------- >> 3 files changed, 58 insertions(+), 38 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h >> b/src/mesa/drivers/dri/i965/brw_context.h >> index ec4b3dd..ace29df 100644 >> --- a/src/mesa/drivers/dri/i965/brw_context.h >> +++ b/src/mesa/drivers/dri/i965/brw_context.h >> @@ -1129,6 +1129,14 @@ struct brw_context >> * the same VB packed over and over again. >> */ >> unsigned int start_vertex_bias; >> + >> + /** >> + * Certain vertex attribute formats aren't natively handled by the >> + * hardware and require special VS code to fix up their values. >> + * >> + * These bitfields indicate which workarounds are needed. >> + */ >> + uint8_t attrib_wa_flags[VERT_ATTRIB_MAX]; >> } vb; >> >> struct { >> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c >> b/src/mesa/drivers/dri/i965/brw_draw.c >> index 4c2802a..d2ea085 100644 >> --- a/src/mesa/drivers/dri/i965/brw_draw.c >> +++ b/src/mesa/drivers/dri/i965/brw_draw.c >> @@ -46,6 +46,7 @@ >> #include "brw_defines.h" >> #include "brw_context.h" >> #include "brw_state.h" >> +#include "brw_vs.h" >> >> #include "intel_batchbuffer.h" >> #include "intel_buffers.h" >> @@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw, >> static void brw_merge_inputs( struct brw_context *brw, >> const struct gl_client_array *arrays[]) >> { >> + const struct gl_context *ctx = &brw->ctx; >> GLuint i; >> >> for (i = 0; i < brw->vb.nr_buffers; i++) { >> @@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw, >> brw->vb.inputs[i].buffer = -1; >> brw->vb.inputs[i].glarray = arrays[i]; >> } >> + >> + if (brw->gen < 8 && !brw->is_haswell) { >> + struct gl_program *vp = &ctx->VertexProgram._Current->Base; >> + /* Prior to Haswell, the hardware can't natively support GL_FIXED or >> + * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. >> + */ >> + for (i = 0; i < VERT_ATTRIB_MAX; i++) { >> + if (!(vp->InputsRead & BITFIELD64_BIT(i))) >> + continue; >> + >> + uint8_t wa_flags = 0; >> + >> + switch (brw->vb.inputs[i].glarray->Type) { >> + >> + case GL_FIXED: >> + wa_flags = brw->vb.inputs[i].glarray->Size; >> + break; >> + >> + case GL_INT_2_10_10_10_REV: >> + wa_flags |= BRW_ATTRIB_WA_SIGN; >> + /* fallthough */ >> + >> + case GL_UNSIGNED_INT_2_10_10_10_REV: >> + if (brw->vb.inputs[i].glarray->Format == GL_BGRA) >> + wa_flags |= BRW_ATTRIB_WA_BGRA; >> + >> + if (brw->vb.inputs[i].glarray->Normalized) >> + wa_flags |= BRW_ATTRIB_WA_NORMALIZE; >> + else if (!brw->vb.inputs[i].glarray->Integer) >> + wa_flags |= BRW_ATTRIB_WA_SCALE; >> + >> + break; >> + } >> + >> + if (brw->vb.attrib_wa_flags[i] != wa_flags) { >> + brw->vb.attrib_wa_flags[i] = wa_flags; >> + /* New workaround flags means we need to compute a new program >> + * key and look up the associated vertex shader. Just flag that >> + * we have a new shader program, since it triggers that effect. >> + */ >> + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; >> + } >> + } >> + } >> } >> >> /** >> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c >> b/src/mesa/drivers/dri/i965/brw_vs.c >> index 2f628e5..6dbac1e 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vs.c >> +++ b/src/mesa/drivers/dri/i965/brw_vs.c >> @@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) >> brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, >> &key.base.tex); >> >> - /* BRW_NEW_VERTICES */ >> - if (brw->gen < 8 && !brw->is_haswell) { >> - /* Prior to Haswell, the hardware can't natively support GL_FIXED or >> - * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. >> - */ >> - for (i = 0; i < VERT_ATTRIB_MAX; i++) { >> - if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i))) >> - continue; >> - >> - uint8_t wa_flags = 0; >> - >> - switch (brw->vb.inputs[i].glarray->Type) { >> - >> - case GL_FIXED: >> - wa_flags = brw->vb.inputs[i].glarray->Size; >> - break; >> - >> - case GL_INT_2_10_10_10_REV: >> - wa_flags |= BRW_ATTRIB_WA_SIGN; >> - /* fallthough */ >> - >> - case GL_UNSIGNED_INT_2_10_10_10_REV: >> - if (brw->vb.inputs[i].glarray->Format == GL_BGRA) >> - wa_flags |= BRW_ATTRIB_WA_BGRA; >> - >> - if (brw->vb.inputs[i].glarray->Normalized) >> - wa_flags |= BRW_ATTRIB_WA_NORMALIZE; >> - else if (!brw->vb.inputs[i].glarray->Integer) >> - wa_flags |= BRW_ATTRIB_WA_SCALE; >> - >> - break; >> - } >> - >> - key.gl_attrib_wa_flags[i] = wa_flags; >> - } >> - } >> + /* BRW_NEW_VERTEX_PROGRAM */ >> + memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags, >> + sizeof(brw->vb.attrib_wa_flags)); >> >> if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG, >> &key, sizeof(key), >> @@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = { >> _NEW_POLYGON | >> _NEW_TEXTURE | >> _NEW_TRANSFORM, >> - .brw = BRW_NEW_VERTEX_PROGRAM | >> - BRW_NEW_VERTICES, >> + .brw = BRW_NEW_VERTEX_PROGRAM, >> }, >> .emit = brw_upload_vs_prog >> }; >> -- >> 2.1.3 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev