From: Dave Airlie <airl...@redhat.com>

Instead of putting the push constants into the upload buffer,
if we have space in the sgprs we can upload the per-stage
constants into the shaders directly.

This saves a few reads from memory in the meta shaders,
we should also be able to inline other objects like
descriptors.

v2: fixup 16->available_sgprs (Samuel)
fixup dynamic offsets. (Alex)
bump to 12.
handle push consts > 32 better, avoid F1 2017 crash

TODO: proper vega support (Samuel)

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/common/ac_nir_to_llvm.c  | 102 +++++++++++++++++++++++++++++++++++----
 src/amd/common/ac_nir_to_llvm.h  |   5 ++
 src/amd/common/ac_shader_info.c  |   5 +-
 src/amd/common/ac_shader_info.h  |   1 +
 src/amd/vulkan/radv_cmd_buffer.c |  75 +++++++++++++++++++++-------
 5 files changed, 159 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6c578de3aca..00ad76a82f7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -92,6 +92,7 @@ struct nir_to_llvm_context {
        LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
        LLVMValueRef ring_offsets;
        LLVMValueRef push_constants;
+       LLVMValueRef inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST];
        LLVMValueRef view_index;
        LLVMValueRef num_work_groups;
        LLVMValueRef workgroup_ids[3];
@@ -243,7 +244,7 @@ static void set_llvm_calling_convention(LLVMValueRef func,
        LLVMSetFunctionCallConv(func, calling_conv);
 }
 
-#define MAX_ARGS 23
+#define MAX_ARGS 32
 struct arg_info {
        LLVMTypeRef types[MAX_ARGS];
        LLVMValueRef *assign[MAX_ARGS];
@@ -538,6 +539,8 @@ struct user_sgpr_info {
        bool need_ring_offsets;
        uint8_t sgpr_count;
        bool indirect_all_descriptor_sets;
+       uint8_t base_inline_push_consts;
+       uint8_t num_inline_push_consts;
 };
 
 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
@@ -609,8 +612,49 @@ static void allocate_user_sgprs(struct nir_to_llvm_context 
*ctx,
        } else {
                user_sgpr_info->sgpr_count += 
util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
        }
+
+       if (ctx->shader_info->info.loads_push_constants) {
+               uint32_t remaining_sgprs = available_sgprs - 
user_sgpr_info->sgpr_count;
+               if (!ctx->shader_info->info.has_indirect_push_constants &&
+                   !ctx->shader_info->info.loads_dynamic_offsets)
+                       remaining_sgprs += 2;
+
+               if (ctx->options->layout->push_constant_size) {
+                       uint8_t num_32bit_push_consts = 
(ctx->shader_info->info.max_push_constant_used -
+                                                        
ctx->shader_info->info.min_push_constant_used) / 4;
+
+                       if ((ctx->shader_info->info.min_push_constant_used / 4) 
<= 63 &&
+                           (ctx->shader_info->info.max_push_constant_used / 4) 
<= 63) {
+                               user_sgpr_info->base_inline_push_consts = 
ctx->shader_info->info.min_push_constant_used / 4;
+
+                               if (num_32bit_push_consts < remaining_sgprs) {
+                                       user_sgpr_info->num_inline_push_consts 
= num_32bit_push_consts;
+                                       if 
(!ctx->shader_info->info.has_indirect_push_constants)
+                                               
ctx->shader_info->info.loads_push_constants = false;
+                               } else {
+                                       user_sgpr_info->num_inline_push_consts 
= remaining_sgprs;
+                               }
+
+                               if (user_sgpr_info->num_inline_push_consts > 
AC_UD_MAX_INLINE_PUSH_CONST)
+                                       user_sgpr_info->num_inline_push_consts 
= AC_UD_MAX_INLINE_PUSH_CONST;
+                       }
+               }
+       }
 }
 
+static void
+declare_inline_push_consts(struct nir_to_llvm_context *ctx,
+                          gl_shader_stage stage,
+                          const struct user_sgpr_info *user_sgpr_info,
+                          struct arg_info *args)
+{
+       ctx->shader_info->inline_push_const_mask = (1ULL << 
user_sgpr_info->num_inline_push_consts) - 1;
+       ctx->shader_info->inline_push_const_base = 
user_sgpr_info->base_inline_push_consts;
+
+       for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++)
+               add_arg(args, ARG_SGPR, ctx->ac.i32, 
&ctx->inline_push_consts[i]);
+
+}
 static void
 declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
                           gl_shader_stage stage,
@@ -640,10 +684,14 @@ declare_global_input_sgprs(struct nir_to_llvm_context 
*ctx,
                add_array_arg(args, const_array(type, 32), desc_sets);
        }
 
-       if (ctx->shader_info->info.loads_push_constants) {
+       if (ctx->shader_info->info.loads_push_constants ||
+           ctx->shader_info->info.loads_dynamic_offsets) {
                /* 1 for push constants and dynamic descriptors */
                add_array_arg(args, type, &ctx->push_constants);
        }
+
+       if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage && 
previous_stage == MESA_SHADER_VERTEX)))
+               declare_inline_push_consts(ctx, stage, user_sgpr_info, args);
 }
 
 static void
@@ -651,6 +699,7 @@ declare_vs_specific_input_sgprs(struct nir_to_llvm_context 
*ctx,
                                gl_shader_stage stage,
                                bool has_previous_stage,
                                gl_shader_stage previous_stage,
+                               const struct user_sgpr_info *user_sgpr_info,
                                struct arg_info *args)
 {
        if (!ctx->is_gs_copy_shader &&
@@ -660,6 +709,7 @@ declare_vs_specific_input_sgprs(struct nir_to_llvm_context 
*ctx,
                        add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
                                &ctx->vertex_buffers);
                }
+               declare_inline_push_consts(ctx, stage, user_sgpr_info, args);
                add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
                add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance);
                if (ctx->shader_info->info.vs.needs_draw_id) {
@@ -693,6 +743,16 @@ declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, 
struct arg_info *args)
        add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
 }
 
+static void
+set_inline_pushconst_locs(struct nir_to_llvm_context *ctx,
+                         const struct user_sgpr_info *user_sgpr_info,
+                         uint8_t *user_sgpr_idx)
+{
+       ctx->shader_info->user_sgprs_locs.push_const_base = 
user_sgpr_info->base_inline_push_consts;
+       for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++)
+               
set_loc(&ctx->shader_info->user_sgprs_locs.inline_push_consts[i], 
user_sgpr_idx, 1, 0);
+}
+
 static void
 set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage,
                      bool has_previous_stage, gl_shader_stage previous_stage,
@@ -731,15 +791,21 @@ set_global_input_locs(struct nir_to_llvm_context *ctx, 
gl_shader_stage stage,
                ctx->shader_info->need_indirect_descriptor_sets = true;
        }
 
-       if (ctx->shader_info->info.loads_push_constants) {
+       if (ctx->shader_info->info.loads_push_constants ||
+           ctx->shader_info->info.loads_dynamic_offsets) {
                set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
        }
+
+
+       if (!((stage == MESA_SHADER_VERTEX) || (has_previous_stage && 
previous_stage == MESA_SHADER_VERTEX)))
+               set_inline_pushconst_locs(ctx, user_sgpr_info, user_sgpr_idx);
 }
 
 static void
 set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
                           gl_shader_stage stage, bool has_previous_stage,
                           gl_shader_stage previous_stage,
+                          const struct user_sgpr_info *user_sgpr_info,
                           uint8_t *user_sgpr_idx)
 {
        if (!ctx->is_gs_copy_shader &&
@@ -750,6 +816,7 @@ set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
                                       user_sgpr_idx, 2);
                }
 
+               set_inline_pushconst_locs(ctx, user_sgpr_info, user_sgpr_idx);
                unsigned vs_num = 2;
                if (ctx->shader_info->info.vs.needs_draw_id)
                        vs_num++;
@@ -805,7 +872,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
                                           previous_stage, &user_sgpr_info,
                                           &args, &desc_sets);
                declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
-                                               previous_stage, &args);
+                                               previous_stage, 
&user_sgpr_info, &args);
 
                if (ctx->shader_info->info.needs_multiview_view_index || 
(!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && 
ctx->options->key.has_multiview_view_index))
                        add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
@@ -838,7 +905,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
                                                   &desc_sets);
                        declare_vs_specific_input_sgprs(ctx, stage,
                                                        has_previous_stage,
-                                                       previous_stage, &args);
+                                                       previous_stage, 
&user_sgpr_info, &args);
 
                        add_arg(&args, ARG_SGPR, ctx->ac.i32,
                                &ctx->ls_out_layout);
@@ -934,7 +1001,7 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
                        } else {
                                declare_vs_specific_input_sgprs(ctx, stage,
                                                                
has_previous_stage,
-                                                               previous_stage,
+                                                               previous_stage, 
&user_sgpr_info,
                                                                &args);
                        }
 
@@ -1076,7 +1143,7 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
                break;
        case MESA_SHADER_VERTEX:
                set_vs_specific_input_locs(ctx, stage, has_previous_stage,
-                                          previous_stage, &user_sgpr_idx);
+                                          previous_stage, &user_sgpr_info, 
&user_sgpr_idx);
                if (ctx->view_index)
                        set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 
1);
                if (ctx->options->key.vs.as_ls) {
@@ -1088,7 +1155,7 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
                break;
        case MESA_SHADER_TESS_CTRL:
                set_vs_specific_input_locs(ctx, stage, has_previous_stage,
-                                          previous_stage, &user_sgpr_idx);
+                                          previous_stage, &user_sgpr_info, 
&user_sgpr_idx);
                if (has_previous_stage)
                        set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
                                       &user_sgpr_idx, 1);
@@ -1108,6 +1175,7 @@ static void create_function(struct nir_to_llvm_context 
*ctx,
                                set_vs_specific_input_locs(ctx, stage,
                                                           has_previous_stage,
                                                           previous_stage,
+                                                          &user_sgpr_info,
                                                           &user_sgpr_idx);
                        else
                                set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
@@ -2371,9 +2439,23 @@ static LLVMValueRef visit_load_push_constant(struct 
nir_to_llvm_context *ctx,
                                              nir_intrinsic_instr *instr)
 {
        LLVMValueRef ptr, addr;
+       LLVMValueRef src0 = get_src(ctx->nir, instr->src[0]);
+       unsigned index = nir_intrinsic_base(instr);
+
+       if (LLVMIsConstant(src0)) {
+               unsigned array_index = index;
+               array_index += LLVMConstIntGetZExtValue(src0);
+               array_index /= 4;
+
+               array_index -= ctx->shader_info->inline_push_const_base;
+               uint64_t bits = (((1ULL << instr->num_components) - 1) << 
array_index);
+               if ((bits & ctx->shader_info->inline_push_const_mask) == bits) {
+                       return ac_build_gather_values(&ctx->ac, 
&ctx->inline_push_consts[array_index], instr->num_components);
+               }
+       }
 
-       addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
-       addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, 
instr->src[0]), "");
+       addr = LLVMConstInt(ctx->ac.i32, index, 0);
+       addr = LLVMBuildAdd(ctx->builder, addr, src0, "");
 
        ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
        ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index b3ad0a09857..22330fdfbc4 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -127,10 +127,13 @@ enum ac_ud_index {
 
 // Match MAX_SETS from radv_descriptor_set.h
 #define AC_UD_MAX_SETS MAX_SETS
+#define AC_UD_MAX_INLINE_PUSH_CONST 12
 
 struct ac_userdata_locations {
        struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];
        struct ac_userdata_info shader_data[AC_UD_MAX_UD];
+       struct ac_userdata_info inline_push_consts[AC_UD_MAX_INLINE_PUSH_CONST];
+       uint8_t push_const_base;
 };
 
 struct ac_vs_output_info {
@@ -156,6 +159,8 @@ struct ac_shader_variant_info {
        unsigned num_user_sgprs;
        unsigned num_input_sgprs;
        unsigned num_input_vgprs;
+       uint64_t inline_push_const_mask;
+       uint32_t inline_push_const_base;
        bool need_indirect_descriptor_sets;
        struct {
                struct {
diff --git a/src/amd/common/ac_shader_info.c b/src/amd/common/ac_shader_info.c
index 18fa9e1c94c..fbb46684ae3 100644
--- a/src/amd/common/ac_shader_info.c
+++ b/src/amd/common/ac_shader_info.c
@@ -179,9 +179,10 @@ ac_nir_shader_info_pass(struct nir_shader *nir,
 {
        struct nir_function *func = (struct nir_function 
*)exec_list_get_head(&nir->functions);
 
-
-       if (options->layout->dynamic_offset_count)
+       if (options->layout->dynamic_offset_count) {
                info->loads_push_constants = true;
+               info->loads_dynamic_offsets = true;
+       }
 
        nir_foreach_variable(variable, &nir->inputs)
                gather_info_input_decl(nir, options, variable, info);
diff --git a/src/amd/common/ac_shader_info.h b/src/amd/common/ac_shader_info.h
index e35cde0ca97..e8ea33f2e3a 100644
--- a/src/amd/common/ac_shader_info.h
+++ b/src/amd/common/ac_shader_info.h
@@ -32,6 +32,7 @@ struct ac_shader_info {
        uint8_t min_push_constant_used;
        uint8_t max_push_constant_used;
        bool has_indirect_push_constants;
+       bool loads_dynamic_offsets;
        bool loads_push_constants;
        bool needs_multiview_view_index;
        bool uses_invocation_id;
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d870a9bedb3..e1953a3a095 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1807,6 +1807,27 @@ radv_flush_descriptors(struct radv_cmd_buffer 
*cmd_buffer,
        assert(cmd_buffer->cs->cdw <= cdw_max);
 }
 
+static struct ac_userdata_info *
+radv_lookup_push_const_sgpr(struct radv_shader_variant *shader,
+                           int idx)
+{
+       idx -= shader->info.user_sgprs_locs.push_const_base;
+       return &shader->info.user_sgprs_locs.inline_push_consts[idx];
+}
+
+static void
+radv_emit_inline_pushconsts(struct radv_cmd_buffer *cmd_buffer,
+                           struct radv_shader_variant *shader,
+                           unsigned base_reg,
+                           int idx, int count, uint32_t *values)
+{
+       struct ac_userdata_info *loc = radv_lookup_push_const_sgpr(shader, idx);
+       assert (loc->sgpr_idx == -1);
+       assert (!loc->indirect);
+       radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 
count);
+       radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
 static void
 radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
                     struct radv_pipeline *pipeline,
@@ -1816,36 +1837,56 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
        unsigned offset;
        void *ptr;
        uint64_t va;
+       bool need_push_constants = false;
 
        stages &= cmd_buffer->push_constant_stages;
        if (!stages ||
            (!layout->push_constant_size && !layout->dynamic_offset_count))
                return;
 
-       if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 
layout->push_constant_size +
-                                         16 * layout->dynamic_offset_count,
-                                         256, &offset, &ptr))
-               return;
+       radv_foreach_stage(stage, stages) {
+               if (!pipeline->shaders[stage])
+                       continue;
+
+               need_push_constants |= 
pipeline->shaders[stage]->info.info.loads_push_constants;
+               need_push_constants |= 
pipeline->shaders[stage]->info.info.loads_dynamic_offsets;
 
-       memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
-       if (layout->dynamic_offset_count) {
-               memcpy((char*)ptr + layout->push_constant_size, 
cmd_buffer->dynamic_buffers,
-                      16 * layout->dynamic_offset_count);
+               uint32_t mask = 
pipeline->shaders[stage]->info.inline_push_const_mask;
+               uint32_t base_reg = pipeline->user_data_0[stage];
+               while (mask) {
+                       int start, count;
+                       u_bit_scan_consecutive_range(&mask, &start, &count);
+                       start += 
pipeline->shaders[stage]->info.inline_push_const_base;
+                       radv_emit_inline_pushconsts(cmd_buffer, 
pipeline->shaders[stage], base_reg,
+                                                   start, count, (uint32_t 
*)&cmd_buffer->push_constants[start * 4]);
+               }
        }
 
-       va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-       va += offset;
+       if (need_push_constants) {
+               if (!radv_cmd_buffer_upload_alloc(cmd_buffer, 
layout->push_constant_size +
+                                                 16 * 
layout->dynamic_offset_count,
+                                                 256, &offset, &ptr))
+                       return;
 
-       MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws,
-                                                          cmd_buffer->cs, 
MESA_SHADER_STAGES * 4);
+               memcpy(ptr, cmd_buffer->push_constants, 
layout->push_constant_size);
+               if (layout->dynamic_offset_count) {
+                       memcpy((char*)ptr + layout->push_constant_size, 
cmd_buffer->dynamic_buffers,
+                              16 * layout->dynamic_offset_count);
+               }
 
-       radv_foreach_stage(stage, stages) {
-               if (pipeline->shaders[stage]) {
-                       radv_emit_userdata_address(cmd_buffer, pipeline, stage,
-                                                  AC_UD_PUSH_CONSTANTS, va);
+               va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+               va += offset;
+
+               MAYBE_UNUSED unsigned cdw_max = 
radeon_check_space(cmd_buffer->device->ws,
+                                                                  
cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+               radv_foreach_stage(stage, stages) {
+                       if (pipeline->shaders[stage]) {
+                               radv_emit_userdata_address(cmd_buffer, 
pipeline, stage,
+                                                          
AC_UD_PUSH_CONSTANTS, va);
+                       }
                }
        }
-
        cmd_buffer->push_constant_stages &= ~stages;
        assert(cmd_buffer->cs->cdw <= cdw_max);
 }
-- 
2.14.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to