On 30/03/18 13:52, Marek Olšák wrote:
On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarc...@itsqueeze.com <mailto:tarc...@itsqueeze.com>> wrote:

    For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
    setting the more accurate masks for some builtin uniforms for now as
    it causes some piglit regressions.
    ---
      src/gallium/drivers/radeonsi/si_shader.c     |  8 +++
      src/gallium/drivers/radeonsi/si_shader_nir.c | 82
    ++++++++++++++++++++++++++--
      2 files changed, 84 insertions(+), 6 deletions(-)

    diff --git a/src/gallium/drivers/radeonsi/si_shader.c
    b/src/gallium/drivers/radeonsi/si_shader.c
    index 62cb7ea7eb5..9a12f9ee8f2 100644
    --- a/src/gallium/drivers/radeonsi/si_shader.c
    +++ b/src/gallium/drivers/radeonsi/si_shader.c
    @@ -2377,8 +2377,16 @@ static LLVMValueRef
    load_const_buffer_desc(struct si_shader_context *ctx, int i)
      static LLVMValueRef load_ubo(struct ac_shader_abi *abi,
    LLVMValueRef index)
      {
             struct si_shader_context *ctx =
    si_shader_context_from_abi(abi);
    +       struct si_shader_selector *sel = ctx->shader->selector;
    +
             LLVMValueRef ptr = LLVMGetParam(ctx->main_fn,
    ctx->param_const_and_shader_buffers);

    +       if (sel->info.const_buffers_declared == 1 &&
    +           sel->info.shader_buffers_declared == 0 &&
    +           !(ctx->screen->info.chip_class == SI && HAVE_LLVM <
    0x0600)) {


You don't have to check SI and LLVM here. (because const_buffers_declared > 1)

Right. Will fix.


    +               return load_const_buffer_desc_fast_path(ctx);
    +       }
    +
             index = si_llvm_bound_index(ctx, index,
    ctx->num_const_buffers);
             index = LLVMBuildAdd(ctx->ac.builder, index,
                                  LLVMConstInt(ctx->i32,
    SI_NUM_SHADER_BUFFERS, 0), "");
    diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
    b/src/gallium/drivers/radeonsi/si_shader_nir.c
    index 52950668714..595f376f6a2 100644
    --- a/src/gallium/drivers/radeonsi/si_shader_nir.c
    +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
    @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct
    nir_shader *nir,

             info->num_outputs = num_outputs;

    +       struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
    +                                              _mesa_key_pointer_equal);
    +
    +       unsigned ubo_idx = 1;
             nir_foreach_variable(variable, &nir->uniforms) {
                     const struct glsl_type *type = variable->type;
                     enum glsl_base_type base_type =
                             glsl_get_base_type(glsl_without_array(type));
                     unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));

    +               /* Gather buffers declared bitmasks. Note: radeonsi
    doesn't
    +                * really use the mask (other than ubo_idx == 1 for
    regular
    +                * uniforms) its really only used for getting the
    buffer count
    +                * so we don't need to worry about the ordering.
    +                */
    +               if (variable->interface_type != NULL) {
    +                       if (variable->data.mode == nir_var_uniform) {
    +
    +                               unsigned block_size;
    +                               if (base_type != GLSL_TYPE_INTERFACE) {
    +                                       struct set_entry *entry =
+  _mesa_set_search(ubo_set, variable->interface_type);
    +
    +                                       /* Check if we have already
    processed
    +                                        * a member from this ubo.
    +                                        */
    +                                       if (entry)
    +                                               continue;
    +
    +                                       block_size = 1;
    +                               } else {
    +                                       block_size = aoa_size;
    +                               }
    +
    +                               info->const_buffers_declared |=
    u_bit_consecutive(ubo_idx, block_size);
    +                               ubo_idx += block_size;


Can you explain what this does?

Sets the mask for an array of blocks e.g.

uniform block {

vec4 a;
vec4 b;

} name[4];

Since each block array element is considered a separate buffer. I should probably rename block_size -> block_count



    +
    +                               _mesa_set_add(ubo_set,
    variable->interface_type);
    +                       }
    +
    +                       if (variable->data.mode ==
    nir_var_shader_storage) {
    +                               /* TODO: make this more accurate */
    +                               info->shader_buffers_declared =
    +                                       u_bit_consecutive(0,
    SI_NUM_SHADER_BUFFERS);

    +                       }
    +
    +                       continue;
    +               }
    +
                     /* We rely on the fact that
    nir_lower_samplers_as_deref has
                      * eliminated struct dereferences.
                      */
    -               if (base_type == GLSL_TYPE_SAMPLER)
    +               if (base_type == GLSL_TYPE_SAMPLER) {
                             info->samplers_declared |=
u_bit_consecutive(variable->data.binding, aoa_size);
    -               else if (base_type == GLSL_TYPE_IMAGE)
    +
    +                       if (variable->data.bindless)
    +                               info->const_buffers_declared |= 1;
    +               } else if (base_type == GLSL_TYPE_IMAGE) {
                             info->images_declared |=
u_bit_consecutive(variable->data.binding, aoa_size);
    +
    +                       if (variable->data.bindless)
    +                               info->const_buffers_declared |= 1;
    +               } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
    +                       if (strncmp(variable->name, "state.", 6) == 0) {
    +                               /* FIXME: figure out why piglit
    tests with builtin
    +                                * uniforms are failing without this.
    +                                */
    +                               info->const_buffers_declared =
    +                                       u_bit_consecutive(0,
    SI_NUM_CONST_BUFFERS);

    +                       } else {
    +                               info->const_buffers_declared |= 1;
    +                               info->const_file_max[0] +=
+  glsl_count_attribute_slots(type, false);
    +                       }
    +               }
             }

    +       _mesa_set_destroy(ubo_set, NULL);
    +
    +       /* This is the max index not max count so we adjust it here */
    +       if (info->const_file_max[0] != 0)
    +               info->const_file_max[0] -= 1;
    +
             info->num_written_clipdistance =
    nir->info.clip_distance_array_size;
             info->num_written_culldistance =
    nir->info.cull_distance_array_size;
             info->clipdist_writemask = u_bit_consecutive(0,
    info->num_written_clipdistance);
    @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader
    *nir,
             if (info->processor == PIPE_SHADER_FRAGMENT)
                     info->uses_kill = nir->info.fs.uses_discard;

    -       /* TODO make this more accurate */
    -       info->const_buffers_declared = u_bit_consecutive(0,
    SI_NUM_CONST_BUFFERS);
    -       info->shader_buffers_declared = u_bit_consecutive(0,
    SI_NUM_SHADER_BUFFERS);
    -
             func = (struct nir_function
    *)exec_list_get_head_const(&nir->functions);
             nir_foreach_block(block, func->impl) {
                     nir_foreach_instr(instr, block)
    @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader
    *nir,
      void
      si_lower_nir(struct si_shader_selector* sel)
      {
    +       /* Disable const buffer fast path for old LLVM versions */
    +       if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) {
    +               sel->info.const_buffers_declared =
    u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
    +               sel->info.shader_buffers_declared =
    u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
    +       }


You can ignore what I said in patch 3.

You can do this instead:
if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0)
    const_buffers_declared |= 0x2;

This disables the optimization while still significantly decreases overhead in the descriptor upload path.

Makes sense will change. Thanks!.


Marek
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to