On Tue, Mar 27, 2018 at 12:19 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote:
> For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip > setting the more accurate masks for some builtin uniforms for now as > it causes some piglit regressions. > --- > src/gallium/drivers/radeonsi/si_shader.c | 8 +++ > src/gallium/drivers/radeonsi/si_shader_nir.c | 82 > ++++++++++++++++++++++++++-- > 2 files changed, 84 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 62cb7ea7eb5..9a12f9ee8f2 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -2377,8 +2377,16 @@ static LLVMValueRef load_const_buffer_desc(struct > si_shader_context *ctx, int i) > static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef > index) > { > struct si_shader_context *ctx = si_shader_context_from_abi(abi); > + struct si_shader_selector *sel = ctx->shader->selector; > + > LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, > ctx->param_const_and_shader_buffers); > > + if (sel->info.const_buffers_declared == 1 && > + sel->info.shader_buffers_declared == 0 && > + !(ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600)) { > You don't have to check SI and LLVM here. (because const_buffers_declared > 1) > + return load_const_buffer_desc_fast_path(ctx); > + } > + > index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); > index = LLVMBuildAdd(ctx->ac.builder, index, > LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, > 0), ""); > diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c > b/src/gallium/drivers/radeonsi/si_shader_nir.c > index 52950668714..595f376f6a2 100644 > --- a/src/gallium/drivers/radeonsi/si_shader_nir.c > +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c > @@ -611,23 +611,91 @@ void si_nir_scan_shader(const struct nir_shader *nir, > > info->num_outputs = num_outputs; > > + struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer, > + _mesa_key_pointer_equal); > + > + unsigned ubo_idx = 1; > nir_foreach_variable(variable, &nir->uniforms) { > const struct glsl_type *type = variable->type; > enum glsl_base_type base_type = > glsl_get_base_type(glsl_without_array(type)); > unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type)); > > + /* Gather buffers declared bitmasks. Note: radeonsi doesn't > + * really use the mask (other than ubo_idx == 1 for regular > + * uniforms) its really only used for getting the buffer > count > + * so we don't need to worry about the ordering. > + */ > + if (variable->interface_type != NULL) { > + if (variable->data.mode == nir_var_uniform) { > + > + unsigned block_size; > + if (base_type != GLSL_TYPE_INTERFACE) { > + struct set_entry *entry = > + _mesa_set_search(ubo_set, > variable->interface_type); > + > + /* Check if we have already > processed > + * a member from this ubo. > + */ > + if (entry) > + continue; > + > + block_size = 1; > + } else { > + block_size = aoa_size; > + } > + > + info->const_buffers_declared |= > u_bit_consecutive(ubo_idx, block_size); > + ubo_idx += block_size; > Can you explain what this does? > + > + _mesa_set_add(ubo_set, > variable->interface_type); > + } > + > + if (variable->data.mode == nir_var_shader_storage) > { > + /* TODO: make this more accurate */ > + info->shader_buffers_declared = > + u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS); > + } > + > + continue; > + } > + > /* We rely on the fact that nir_lower_samplers_as_deref has > * eliminated struct dereferences. > */ > - if (base_type == GLSL_TYPE_SAMPLER) > + if (base_type == GLSL_TYPE_SAMPLER) { > info->samplers_declared |= > u_bit_consecutive(variable->data.binding, > aoa_size); > - else if (base_type == GLSL_TYPE_IMAGE) > + > + if (variable->data.bindless) > + info->const_buffers_declared |= 1; > + } else if (base_type == GLSL_TYPE_IMAGE) { > info->images_declared |= > u_bit_consecutive(variable->data.binding, > aoa_size); > + > + if (variable->data.bindless) > + info->const_buffers_declared |= 1; > + } else if (base_type != GLSL_TYPE_ATOMIC_UINT) { > + if (strncmp(variable->name, "state.", 6) == 0) { > + /* FIXME: figure out why piglit tests with > builtin > + * uniforms are failing without this. > + */ > + info->const_buffers_declared = > + u_bit_consecutive(0, > SI_NUM_CONST_BUFFERS); > + } else { > + info->const_buffers_declared |= 1; > + info->const_file_max[0] += > + glsl_count_attribute_slots(type, > false); > + } > + } > } > > + _mesa_set_destroy(ubo_set, NULL); > + > + /* This is the max index not max count so we adjust it here */ > + if (info->const_file_max[0] != 0) > + info->const_file_max[0] -= 1; > + > info->num_written_clipdistance = nir->info.clip_distance_array_ > size; > info->num_written_culldistance = nir->info.cull_distance_array_ > size; > info->clipdist_writemask = u_bit_consecutive(0, info->num_written_ > clipdistance); > @@ -636,10 +704,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, > if (info->processor == PIPE_SHADER_FRAGMENT) > info->uses_kill = nir->info.fs.uses_discard; > > - /* TODO make this more accurate */ > - info->const_buffers_declared = u_bit_consecutive(0, > SI_NUM_CONST_BUFFERS); > - info->shader_buffers_declared = u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS); > - > func = (struct nir_function *)exec_list_get_head_const(& > nir->functions); > nir_foreach_block(block, func->impl) { > nir_foreach_instr(instr, block) > @@ -654,6 +718,12 @@ void si_nir_scan_shader(const struct nir_shader *nir, > void > si_lower_nir(struct si_shader_selector* sel) > { > + /* Disable const buffer fast path for old LLVM versions */ > + if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600) { > + sel->info.const_buffers_declared = u_bit_consecutive(0, > SI_NUM_CONST_BUFFERS); > + sel->info.shader_buffers_declared = u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS); > + } > You can ignore what I said in patch 3. You can do this instead: if (.... && const_buffers_declared == 1 && shader_buffers_declared = 0) const_buffers_declared |= 0x2; This disables the optimization while still significantly decreases overhead in the descriptor upload path. Marek
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev