fs: push first double-based uniforms in push constant buffer

Samuel Iglesias Gonsálvez Sun, 08 May 2016 22:07:59 -0700


On 07/05/16 09:22, Jordan Justen wrote:
> On 2016-05-05 23:56:08, Samuel Iglesias Gonsálvez wrote:
>> When there is a mix of definitions of uniforms with 32-bit or 64-bit
>> data type sizes, the driver ends up doing misaligned access to double
>> based variables in the push constant buffer.
>>
>> To fix this, this patch pushes first all the 64-bit variables and
>> then the rest. Then, all the variables would be aligned to
>> its data type size.
>>
>> Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp | 113 
>> +++++++++++++++++++++++++----------
>>  1 file changed, 83 insertions(+), 30 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 65aa3c7..7eaf1b4 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -39,6 +39,7 @@
>>  #include "brw_program.h"
>>  #include "brw_dead_control_flow.h"
>>  #include "compiler/glsl_types.h"
>> +#include "program/prog_parameter.h"
>>  
>>  using namespace brw;
>>  
>> @@ -2004,6 +2005,45 @@ fs_visitor::compact_virtual_grfs()
>>     return progress;
>>  }
>>  
>> +static void
>> +set_push_pull_constant_loc(unsigned uniform, int &chunk_start, bool 
>> contiguous,
>> +                           int *push_constant_loc, int *pull_constant_loc,
>> +                           unsigned &num_push_constants,
>> +                           unsigned &num_pull_constants,
>> +                           const unsigned max_push_components,
>> +                           const unsigned max_chunk_size,
>> +                           struct brw_stage_prog_data *stage_prog_data)
>> +{
>> +   /* This is the first live uniform in the chunk */
>> +   if (chunk_start < 0)
>> +      chunk_start = uniform;
>> +
>> +   /* If this element does not need to be contiguous with the next, we
>> +    * split at this point and everthing between chunk_start and u forms a
>> +    * single chunk.
>> +    */
>> +   if (!contiguous) {
>> +      unsigned chunk_size = uniform - chunk_start + 1;
>> +
>> +      /* Decide whether we should push or pull this parameter.  In the
>> +       * Vulkan driver, push constants are explicitly exposed via the API
>> +       * so we push everything.  In GL, we only push small arrays.
>> +       */
>> +      if (stage_prog_data->pull_param == NULL ||
>> +          (num_push_constants + chunk_size <= max_push_components &&
>> +           chunk_size <= max_chunk_size)) {
>> +         assert(num_push_constants + chunk_size <= max_push_components);
>> +         for (unsigned j = chunk_start; j <= uniform; j++)
>> +            push_constant_loc[j] = num_push_constants++;
>> +      } else {
>> +         for (unsigned j = chunk_start; j <= uniform; j++)
>> +            pull_constant_loc[j] = num_pull_constants++;
>> +      }
>> +
>> +      chunk_start = -1;
>> +   }
>> +}
>> +
>>  /**
>>   * Assign UNIFORM file registers to either push constants or pull constants.
>>   *
>> @@ -2022,6 +2062,8 @@ fs_visitor::assign_constant_locations()
>>  
>>     bool is_live[uniforms];
>>     memset(is_live, 0, sizeof(is_live));
>> +   bool is_live_64bit[uniforms];
>> +   memset(is_live_64bit, 0, sizeof(is_live_64bit));
>>  
>>     /* For each uniform slot, a value of true indicates that the given slot 
>> and
>>      * the next slot must remain contiguous.  This is used to keep us from
>> @@ -2054,14 +2096,21 @@ fs_visitor::assign_constant_locations()
>>              for (unsigned j = constant_nr; j < last; j++) {
>>                 is_live[j] = true;
>>                 contiguous[j] = true;
>> +               if (type_sz(inst->src[i].type) == 8) {
>> +                  is_live_64bit[j] = true;
>> +               }
>>              }
>>              is_live[last] = true;
>>           } else {
>>              if (constant_nr >= 0 && constant_nr < (int) uniforms) {
>>                 int regs_read = inst->components_read(i) *
>>                    type_sz(inst->src[i].type) / 4;
>> -               for (int j = 0; j < regs_read; j++)
>> +               for (int j = 0; j < regs_read; j++) {
>>                    is_live[constant_nr + j] = true;
>> +                  if (type_sz(inst->src[i].type) == 8) {
>> +                     is_live_64bit[constant_nr + j] = true;
>> +                  }
>> +               }
>>              }
>>           }
>>        }
>> @@ -2090,43 +2139,46 @@ fs_visitor::assign_constant_locations()
>>     pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
>>  
>>     int chunk_start = -1;
>> +
>> +
>> +   /* First push 64-bit uniforms */
> 
> Maybe add "to ensure they are properly aligned"?
>


Yeah, I will add it.

>>     for (unsigned u = 0; u < uniforms; u++) {
>> -      push_constant_loc[u] = -1;
>> +      if (!is_live[u] || !is_live_64bit[u])
>> +         continue;
>> +
>>        pull_constant_loc[u] = -1;
>> +      push_constant_loc[u] = -1;
>>  
>> -      if (!is_live[u])
>> -         continue;
>> +      set_push_pull_constant_loc(u, chunk_start, contiguous[u],
>> +                                 push_constant_loc, pull_constant_loc,
>> +                                 num_push_constants, num_pull_constants,
>> +                                 max_push_components, max_chunk_size,
>> +                                 stage_prog_data);
>>  
>> -      /* This is the first live uniform in the chunk */
>> -      if (chunk_start < 0)
>> -         chunk_start = u;
>> +   }
>>  
>> -      /* If this element does not need to be contiguous with the next, we
>> -       * split at this point and everthing between chunk_start and u forms a
>> -       * single chunk.
>> -       */
>> -      if (!contiguous[u]) {
>> -         unsigned chunk_size = u - chunk_start + 1;
>> +   /* Then push the rest of uniforms */
>> +   for (unsigned u = 0; u < uniforms; u++) {
>> +      if (!is_live[u] || is_live_64bit[u])
>> +         continue;
>>  
>> -         /* Decide whether we should push or pull this parameter.  In the
>> -          * Vulkan driver, push constants are explicitly exposed via the API
>> -          * so we push everything.  In GL, we only push small arrays.
>> -          */
>> -         if (stage_prog_data->pull_param == NULL ||
>> -             (num_push_constants + chunk_size <= max_push_components &&
>> -              chunk_size <= max_chunk_size)) {
>> -            assert(num_push_constants + chunk_size <= max_push_components);
>> -            for (unsigned j = chunk_start; j <= u; j++)
>> -               push_constant_loc[j] = num_push_constants++;
>> -         } else {
>> -            for (unsigned j = chunk_start; j <= u; j++)
>> -               pull_constant_loc[j] = num_pull_constants++;
>> -         }
>> +      pull_constant_loc[u] = -1;
>> +      push_constant_loc[u] = -1;
>>  
>> -         chunk_start = -1;
>> -      }
>> +      set_push_pull_constant_loc(u, chunk_start, contiguous[u],
>> +                                 push_constant_loc, pull_constant_loc,
>> +                                 num_push_constants, num_pull_constants,
>> +                                 max_push_components, max_chunk_size,
>> +                                 stage_prog_data);
>>     }
>>  
>> +   /* As the uniforms are going to be reordered, take the data from a 
>> temporal
> 
> temporary
> 

OK

>> +    * copy of the original param[].
>> +    */
>> +   gl_constant_value **param = rzalloc_array(mem_ctx, gl_constant_value*,
>> +                                             stage_prog_data->nr_params);
> 
> Since we always freeing it below, should we use NULL instead of
> mem_ctx?
> 

Yes, at the end using mem_ctx doesn't give any advantage as we are
freeing it below. I'll change it.

> I think don't need the 'z' version since we use memcpy next.

Right!

> 
> Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com>
> 

OK, thanks a lot for the review!

Sam

>> +   memcpy(param, stage_prog_data->param,
>> +          sizeof(gl_constant_value*) * stage_prog_data->nr_params);
>>     stage_prog_data->nr_params = num_push_constants;
>>     stage_prog_data->nr_pull_params = num_pull_constants;
>>  
>> @@ -2139,7 +2191,7 @@ fs_visitor::assign_constant_locations()
>>      * having to make a copy.
>>      */
>>     for (unsigned int i = 0; i < uniforms; i++) {
>> -      const gl_constant_value *value = stage_prog_data->param[i];
>> +      const gl_constant_value *value = param[i];
>>  
>>        if (pull_constant_loc[i] != -1) {
>>           stage_prog_data->pull_param[pull_constant_loc[i]] = value;
>> @@ -2147,6 +2199,7 @@ fs_visitor::assign_constant_locations()
>>           stage_prog_data->param[push_constant_loc[i]] = value;
>>        }
>>     }
>> +   ralloc_free(param);
>>  }
>>  
>>  /**
>> -- 
>> 2.5.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 56/59] i965/fs: push first double-based uniforms in push constant buffer

Reply via email to