Re: [Mesa-dev] [PATCH] i965: Compute VS attribute WA bits earlier and check if they changed.

Chris Forbes Wed, 03 Dec 2014 18:17:49 -0800

Half-ignore the first part of that... I thinko'd and assumed Baytrail
worked the same as Haswell here, but obviously doesn't. I am
interested in how it affects earlier gens, though.


On Thu, Dec 4, 2014 at 3:13 PM, Chris Forbes <chr...@ijw.co.nz> wrote:
> What's the perf impact on a platform which actually needs this?
>
> I think there would be some further substantial gains to be had by
> giving this its own dirty bit -- piles of other atoms listen to
> BRW_NEW_VERTEX_PROGRAM, but don't care about the workaround bits.
>
> On Thu, Dec 4, 2014 at 2:42 PM, Kenneth Graunke <kenn...@whitecape.org> wrote:
>> BRW_NEW_VERTICES is flagged every time we draw a primitive.  Having
>> the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to
>> compute the VS program key and do a program cache lookup for every
>> single primitive.  This is painfully expensive.
>>
>> The workaround bit computation is almost entirely based on the vertex
>> attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs.
>> The only thing it uses the VS program for is to see which VS inputs are
>> actually read.  brw_merge_inputs() happens once per primitive, and can
>> safely look at the currently bound vertex program, as it doesn't change
>> in the middle of a draw.
>>
>> This patch moves the workaround bit computation to brw_merge_inputs(),
>> right after assigning brw->vb.inputs[i], and stores the previous WA bit
>> values in the context.  If they've actually changed from the last draw
>> (which is uncommon), we signal that we need a new vertex program,
>> causing brw_vs_prog to compute a new key.
>>
>> Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS
>> state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should
>> work just as well, and already exists.
>>
>> Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my
>> Haswell GT3e system.  I'm told that Baytrail shows similar gains.
>>
>> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h |  8 ++++++
>>  src/mesa/drivers/dri/i965/brw_draw.c    | 46 
>> +++++++++++++++++++++++++++++++++
>>  src/mesa/drivers/dri/i965/brw_vs.c      | 42 +++---------------------------
>>  3 files changed, 58 insertions(+), 38 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index ec4b3dd..ace29df 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1129,6 +1129,14 @@ struct brw_context
>>         * the same VB packed over and over again.
>>         */
>>        unsigned int start_vertex_bias;
>> +
>> +      /**
>> +       * Certain vertex attribute formats aren't natively handled by the
>> +       * hardware and require special VS code to fix up their values.
>> +       *
>> +       * These bitfields indicate which workarounds are needed.
>> +       */
>> +      uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
>>     } vb;
>>
>>     struct {
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
>> b/src/mesa/drivers/dri/i965/brw_draw.c
>> index 4c2802a..d2ea085 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -46,6 +46,7 @@
>>  #include "brw_defines.h"
>>  #include "brw_context.h"
>>  #include "brw_state.h"
>> +#include "brw_vs.h"
>>
>>  #include "intel_batchbuffer.h"
>>  #include "intel_buffers.h"
>> @@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw,
>>  static void brw_merge_inputs( struct brw_context *brw,
>>                        const struct gl_client_array *arrays[])
>>  {
>> +   const struct gl_context *ctx = &brw->ctx;
>>     GLuint i;
>>
>>     for (i = 0; i < brw->vb.nr_buffers; i++) {
>> @@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw,
>>        brw->vb.inputs[i].buffer = -1;
>>        brw->vb.inputs[i].glarray = arrays[i];
>>     }
>> +
>> +   if (brw->gen < 8 && !brw->is_haswell) {
>> +      struct gl_program *vp = &ctx->VertexProgram._Current->Base;
>> +      /* Prior to Haswell, the hardware can't natively support GL_FIXED or
>> +       * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
>> +       */
>> +      for (i = 0; i < VERT_ATTRIB_MAX; i++) {
>> +         if (!(vp->InputsRead & BITFIELD64_BIT(i)))
>> +            continue;
>> +
>> +         uint8_t wa_flags = 0;
>> +
>> +         switch (brw->vb.inputs[i].glarray->Type) {
>> +
>> +         case GL_FIXED:
>> +            wa_flags = brw->vb.inputs[i].glarray->Size;
>> +            break;
>> +
>> +         case GL_INT_2_10_10_10_REV:
>> +            wa_flags |= BRW_ATTRIB_WA_SIGN;
>> +            /* fallthough */
>> +
>> +         case GL_UNSIGNED_INT_2_10_10_10_REV:
>> +            if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
>> +               wa_flags |= BRW_ATTRIB_WA_BGRA;
>> +
>> +            if (brw->vb.inputs[i].glarray->Normalized)
>> +               wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
>> +            else if (!brw->vb.inputs[i].glarray->Integer)
>> +               wa_flags |= BRW_ATTRIB_WA_SCALE;
>> +
>> +            break;
>> +         }
>> +
>> +         if (brw->vb.attrib_wa_flags[i] != wa_flags) {
>> +            brw->vb.attrib_wa_flags[i] = wa_flags;
>> +            /* New workaround flags means we need to compute a new program
>> +             * key and look up the associated vertex shader.  Just flag that
>> +             * we have a new shader program, since it triggers that effect.
>> +             */
>> +            brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
>> +         }
>> +      }
>> +   }
>>  }
>>
>>  /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
>> b/src/mesa/drivers/dri/i965/brw_vs.c
>> index 2f628e5..6dbac1e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vs.c
>> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
>> @@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>>     brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
>>                                        &key.base.tex);
>>
>> -   /* BRW_NEW_VERTICES */
>> -   if (brw->gen < 8 && !brw->is_haswell) {
>> -      /* Prior to Haswell, the hardware can't natively support GL_FIXED or
>> -       * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
>> -       */
>> -      for (i = 0; i < VERT_ATTRIB_MAX; i++) {
>> -         if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
>> -            continue;
>> -
>> -         uint8_t wa_flags = 0;
>> -
>> -         switch (brw->vb.inputs[i].glarray->Type) {
>> -
>> -         case GL_FIXED:
>> -            wa_flags = brw->vb.inputs[i].glarray->Size;
>> -            break;
>> -
>> -         case GL_INT_2_10_10_10_REV:
>> -            wa_flags |= BRW_ATTRIB_WA_SIGN;
>> -            /* fallthough */
>> -
>> -         case GL_UNSIGNED_INT_2_10_10_10_REV:
>> -            if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
>> -               wa_flags |= BRW_ATTRIB_WA_BGRA;
>> -
>> -            if (brw->vb.inputs[i].glarray->Normalized)
>> -               wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
>> -            else if (!brw->vb.inputs[i].glarray->Integer)
>> -               wa_flags |= BRW_ATTRIB_WA_SCALE;
>> -
>> -            break;
>> -         }
>> -
>> -         key.gl_attrib_wa_flags[i] = wa_flags;
>> -      }
>> -   }
>> +   /* BRW_NEW_VERTEX_PROGRAM */
>> +   memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
>> +          sizeof(brw->vb.attrib_wa_flags));
>>
>>     if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
>>                          &key, sizeof(key),
>> @@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = {
>>                 _NEW_POLYGON |
>>                 _NEW_TEXTURE |
>>                 _NEW_TRANSFORM,
>> -      .brw   = BRW_NEW_VERTEX_PROGRAM |
>> -               BRW_NEW_VERTICES,
>> +      .brw   = BRW_NEW_VERTEX_PROGRAM,
>>     },
>>     .emit = brw_upload_vs_prog
>>  };
>> --
>> 2.1.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Compute VS attribute WA bits earlier and check if they changed.

Reply via email to