From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/ac_llvm_build.c                    | 42 ++++++++++++++---------
 src/amd/common/ac_llvm_build.h                    | 14 ++++----
 src/amd/common/ac_nir_to_llvm.c                   | 22 ++++++------
 src/gallium/drivers/radeonsi/si_shader.c          | 34 +++++++++---------
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |  4 +--
 5 files changed, 61 insertions(+), 55 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 1d97b09..949f181 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -710,46 +710,54 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
                       ac_build_gep0(ctx, base_ptr, index));
 }
 
 /**
  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
  * It's equivalent to doing a load from &base_ptr[index].
  *
  * \param base_ptr  Where the array starts.
  * \param index     The element index into the array.
  * \param uniform   Whether the base_ptr and index can be assumed to be
- *                  dynamically uniform
+ *                  dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
  */
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
-                     LLVMValueRef base_ptr, LLVMValueRef index,
-                     bool uniform)
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+                    LLVMValueRef index, bool uniform, bool invariant)
 {
-       LLVMValueRef pointer;
+       LLVMValueRef pointer, result;
 
        pointer = ac_build_gep0(ctx, base_ptr, index);
        if (uniform)
                LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
-       return LLVMBuildLoad(ctx->builder, pointer, "");
+       result = LLVMBuildLoad(ctx->builder, pointer, "");
+       if (invariant)
+               LLVMSetMetadata(result, ctx->invariant_load_md_kind, 
ctx->empty_md);
+       return result;
 }
 
-/**
- * Do a load from &base_ptr[index], but also add a flag that it's loading
- * a constant from a dynamically uniform index.
- */
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
-                           LLVMValueRef base_ptr, LLVMValueRef index)
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+                          LLVMValueRef index)
 {
-       LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
-       LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
-       return result;
+       return ac_build_load_custom(ctx, base_ptr, index, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+                                    LLVMValueRef base_ptr, LLVMValueRef index)
+{
+       return ac_build_load_custom(ctx, base_ptr, index, false, true);
+}
+
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index)
+{
+       return ac_build_load_custom(ctx, base_ptr, index, true, true);
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
  * The type of vdata must be one of i32 (num_channels=1), v2i32 
(num_channels=2),
  * or v4i32 (num_channels=3,4).
  */
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef rsrc,
                            LLVMValueRef vdata,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index ac8ea9c..f0b5875 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -143,28 +143,26 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
              LLVMValueRef base_ptr,
              LLVMValueRef index);
 
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
                       LLVMValueRef base_ptr, LLVMValueRef index,
                       LLVMValueRef value);
 
-LLVMValueRef
-ac_build_indexed_load(struct ac_llvm_context *ctx,
-                     LLVMValueRef base_ptr, LLVMValueRef index,
-                     bool uniform);
-
-LLVMValueRef
-ac_build_indexed_load_const(struct ac_llvm_context *ctx,
-                           LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+                          LLVMValueRef index);
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+                                    LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index);
 
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef rsrc,
                            LLVMValueRef vdata,
                            unsigned num_channels,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
                            bool glc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 4492d8e..8278486 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -792,21 +792,21 @@ static void create_function(struct nir_to_llvm_context 
*ctx)
                        } else
                                ctx->descriptor_sets[i] = NULL;
                }
        } else {
                uint32_t desc_sgpr_idx = user_sgpr_idx;
                set_userdata_location_shader(ctx, 
AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
 
                for (unsigned i = 0; i < num_sets; ++i) {
                        if (ctx->options->layout->set[i].layout->shader_stages 
& (1 << ctx->stage)) {
                                
set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i],
 desc_sgpr_idx, 2, i * 8);
-                               ctx->descriptor_sets[i] = 
ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, 
false));
+                               ctx->descriptor_sets[i] = 
ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
 
                        } else
                                ctx->descriptor_sets[i] = NULL;
                }
                ctx->shader_info->need_indirect_descriptor_sets = true;
        }
 
        if (ctx->shader_info->info.needs_push_constants) {
                set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, 
&user_sgpr_idx, 2);
        }
@@ -2518,21 +2518,21 @@ out:
 
        *const_out = const_offset;
        *indir_out = offset;
 }
 
 static LLVMValueRef
 lds_load(struct nir_to_llvm_context *ctx,
         LLVMValueRef dw_addr)
 {
        LLVMValueRef value;
-       value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+       value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
        return value;
 }
 
 static void
 lds_store(struct nir_to_llvm_context *ctx,
          LLVMValueRef dw_addr, LLVMValueRef value)
 {
        value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
        ac_build_indexed_store(&ctx->ac, ctx->lds,
                               dw_addr, value);
@@ -3733,21 +3733,21 @@ static LLVMValueRef lookup_interp_param(struct 
nir_to_llvm_context *ctx,
 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
                                         LLVMValueRef sample_id)
 {
        LLVMValueRef result;
        LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, 
LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false));
 
        ptr = LLVMBuildBitCast(ctx->builder, ptr,
                               const_array(ctx->v2f32, 64), "");
 
        sample_id = LLVMBuildAdd(ctx->builder, sample_id, 
ctx->sample_pos_offset, "");
-       result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
+       result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
 
        return result;
 }
 
 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
 {
        LLVMValueRef values[2];
 
        values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
        values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
@@ -4224,21 +4224,21 @@ static LLVMValueRef radv_get_sampler_desc(struct 
ac_shader_abi *abi,
        assert(stride % type_size == 0);
 
        if (!index)
                index = ctx->i32zero;
 
        index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / 
type_size, 0), "");
 
        list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
        list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
 
-       return ac_build_indexed_load_const(&ctx->ac, list, index);
+       return ac_build_load_to_sgpr(&ctx->ac, list, index);
 }
 
 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
                                     const nir_deref_var *deref,
                                     enum ac_descriptor_type desc_type,
                                     bool image, bool write)
 {
        LLVMValueRef index = NULL;
        unsigned constant_index = 0;
        const nir_deref *tail = &deref->deref;
@@ -4921,21 +4921,21 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
                                            ctx->abi.start_instance, "");
                ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
                                            ctx->shader_info->vs.vgpr_comp_cnt);
        } else
                buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
                                            ctx->abi.base_vertex, "");
 
        for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
                t_offset = LLVMConstInt(ctx->i32, index + i, false);
 
-               t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, 
t_offset);
+               t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
                input = ac_build_buffer_load_format(&ctx->ac, t_list,
                                                    buffer_index,
                                                    LLVMConstInt(ctx->i32, 0, 
false),
                                                    true);
 
                for (unsigned chan = 0; chan < 4; chan++) {
                        LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, 
false);
                        ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
                                ac_to_integer(&ctx->ac, 
LLVMBuildExtractElement(ctx->builder,
@@ -6211,43 +6211,43 @@ ac_nir_eliminate_const_vs_outputs(struct 
nir_to_llvm_context *ctx)
                               outinfo->vs_output_param_offset,
                               VARYING_SLOT_MAX,
                               &outinfo->param_exports);
 }
 
 static void
 ac_setup_rings(struct nir_to_llvm_context *ctx)
 {
        if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
            (ctx->stage == MESA_SHADER_TESS_EVAL && 
ctx->options->key.tes.as_es)) {
-               ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
+               ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
        }
 
        if (ctx->is_gs_copy_shader) {
-               ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
+               ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
        }
        if (ctx->stage == MESA_SHADER_GEOMETRY) {
                LLVMValueRef tmp;
-               ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
-               ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
+               ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
+               ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
 
                ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, 
ctx->v4i32, "");
 
                ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, 
ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), "");
                tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, 
ctx->i32one, "");
                tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
                ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, 
ctx->gsvs_ring, tmp, ctx->i32one, "");
        }
 
        if (ctx->stage == MESA_SHADER_TESS_CTRL ||
            ctx->stage == MESA_SHADER_TESS_EVAL) {
-               ctx->hs_ring_tess_offchip = 
ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, 
RING_HS_TESS_OFFCHIP, false));
-               ctx->hs_ring_tess_factor = 
ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, 
RING_HS_TESS_FACTOR, false));
+               ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
+               ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, 
ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
        }
 }
 
 static unsigned
 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
                              const struct nir_shader *nir)
 {
        switch (nir->stage) {
        case MESA_SHADER_TESS_CTRL:
                return chip_class >= CIK ? 128 : 64;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 506da6f..f83f45c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -554,21 +554,21 @@ void si_llvm_load_input_vs(
        LLVMValueRef t_offset;
        LLVMValueRef t_list;
        LLVMValueRef vertex_index;
        LLVMValueRef input[3];
 
        /* Load the T list */
        t_list_ptr = LLVMGetParam(ctx->main_fn, ctx->param_vertex_buffers);
 
        t_offset = LLVMConstInt(ctx->i32, input_index, 0);
 
-       t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
+       t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
 
        vertex_index = LLVMGetParam(ctx->main_fn,
                                    ctx->param_vertex_index0 +
                                    input_index);
 
        fix_fetch = ctx->shader->key.mono.vs_fix_fetch[input_index];
 
        /* Do multiple loads for special formats. */
        switch (fix_fetch) {
        case SI_FIX_FETCH_RGB_64_FLOAT:
@@ -1092,26 +1092,26 @@ static LLVMValueRef lds_load(struct 
lp_build_tgsi_context *bld_base,
                for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
                        values[chan] = lds_load(bld_base, type, chan, dw_addr);
 
                return lp_build_gather_values(&ctx->gallivm, values,
                                              TGSI_NUM_CHANNELS);
        }
 
        dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
                            LLVMConstInt(ctx->i32, swizzle, 0));
 
-       value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
+       value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
                                       ctx->i32_1);
-               value2 = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, 
false);
+               value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
                return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
        }
 
        return bitcast(bld_base, type, value);
 }
 
 /**
  * Store to LDS.
  *
  * \param swizzle      offset (typically 0..3)
@@ -1610,21 +1610,21 @@ static LLVMValueRef buffer_load_const(struct 
si_shader_context *ctx,
 {
        return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
                                    0, 0, 0, true, true);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, 
LLVMValueRef sample_id)
 {
        struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
        LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
        LLVMValueRef buf_index = LLVMConstInt(ctx->i32, 
SI_PS_CONST_SAMPLE_POSITIONS, 0);
-       LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, 
buf_index);
+       LLVMValueRef resource = ac_build_load_to_sgpr(&ctx->ac, desc, 
buf_index);
 
        /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
        LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
        LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
        LLVMValueRef pos[4] = {
                buffer_load_const(ctx, resource, offset0),
                buffer_load_const(ctx, resource, offset1),
                LLVMConstReal(ctx->f32, 0),
                LLVMConstReal(ctx->f32, 0)
@@ -1785,21 +1785,21 @@ void si_load_system_value(struct si_shader_context *ctx,
        }
 
        case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI:
        case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI:
        {
                LLVMValueRef buf, slot, val[4];
                int i, offset;
 
                slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 
0);
                buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
-               buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
+               buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
                offset = decl->Semantic.Name == 
TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
 
                for (i = 0; i < 4; i++)
                        val[i] = buffer_load_const(ctx, buf,
                                                   LLVMConstInt(ctx->i32, 
(offset + i) * 4, 0));
                value = lp_build_gather_values(&ctx->gallivm, val, 4);
                break;
        }
 
        case TGSI_SEMANTIC_PRIMID:
@@ -1928,49 +1928,49 @@ void si_declare_compute_memory(struct si_shader_context 
*ctx,
        LLVMSetAlignment(var, 4);
 
        ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 }
 
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int 
i)
 {
        LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
                                             
ctx->param_const_and_shader_buffers);
 
-       return ac_build_indexed_load_const(&ctx->ac, list_ptr,
-                       LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0));
+       return ac_build_load_to_sgpr(&ctx->ac, list_ptr,
+                                    LLVMConstInt(ctx->i32, 
si_get_constbuf_slot(i), 0));
 }
 
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, 
ctx->param_const_and_shader_buffers);
 
        index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
        index = LLVMBuildAdd(ctx->ac.builder, index,
                             LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), 
"");
 
-       return ac_build_indexed_load_const(&ctx->ac, ptr, index);
+       return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
 }
 
 static LLVMValueRef
 load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
                                             
ctx->param_const_and_shader_buffers);
 
        index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
        index = LLVMBuildSub(ctx->ac.builder,
                             LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 
0),
                             index, "");
 
-       return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index);
+       return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
 }
 
 static LLVMValueRef fetch_constant(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type,
        unsigned swizzle)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_ind_register *ireg = &reg->Indirect;
@@ -2001,21 +2001,21 @@ static LLVMValueRef fetch_constant(
        idx = reg->Register.Index * 4 + swizzle;
 
        if (reg->Dimension.Indirect) {
                LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, 
ctx->param_const_and_shader_buffers);
                LLVMValueRef index;
                index = si_get_bounded_indirect_index(ctx, &reg->DimIndirect,
                                                      reg->Dimension.Index,
                                                      ctx->num_const_buffers);
                index = LLVMBuildAdd(ctx->ac.builder, index,
                                     LLVMConstInt(ctx->i32, 
SI_NUM_SHADER_BUFFERS, 0), "");
-               bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index);
+               bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index);
        } else
                bufp = load_const_buffer_desc(ctx, buf);
 
        if (reg->Register.Indirect) {
                addr = si_get_indirect_index(ctx, ireg, 16, idx * 4);
        } else {
                addr = LLVMConstInt(ctx->i32, idx * 4, 0);
        }
 
        return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
@@ -2276,21 +2276,21 @@ static void si_llvm_emit_clipvertex(struct 
lp_build_tgsi_context *bld_base,
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
        unsigned reg_index;
        unsigned chan;
        unsigned const_chan;
        LLVMValueRef base_elt;
        LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
        LLVMValueRef constbuf_index = LLVMConstInt(ctx->i32,
                                                   SI_VS_CONST_CLIP_PLANES, 0);
-       LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, 
ptr, constbuf_index);
+       LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, 
constbuf_index);
 
        for (reg_index = 0; reg_index < 2; reg_index ++) {
                struct ac_export_args *args = &pos[2 + reg_index];
 
                args->out[0] =
                args->out[1] =
                args->out[2] =
                args->out[3] = LLVMConstReal(ctx->f32, 0.0f);
 
                /* Compute dot products of position and user clip plane vectors 
*/
@@ -2433,21 +2433,21 @@ static void si_llvm_emit_streamout(struct 
si_shader_context *ctx,
                LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
                                                    ctx->param_rw_buffers);
 
                for (i = 0; i < 4; i++) {
                        if (!so->stride[i])
                                continue;
 
                        LLVMValueRef offset = LLVMConstInt(ctx->i32,
                                                           SI_VS_STREAMOUT_BUF0 
+ i, 0);
 
-                       so_buffers[i] = ac_build_indexed_load_const(&ctx->ac, 
buf_ptr, offset);
+                       so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, 
buf_ptr, offset);
 
                        LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn,
                                                              
ctx->param_streamout_offset[i]);
                        so_offset = LLVMBuildMul(builder, so_offset, 
LLVMConstInt(ctx->i32, 4, 0), "");
 
                        so_write_offset[i] = LLVMBuildMul(builder, 
so_write_index,
                                                          
LLVMConstInt(ctx->i32, so->stride[i]*4, 0), "");
                        so_write_offset[i] = LLVMBuildAdd(builder, 
so_write_offset[i], so_offset, "");
                }
 
@@ -4717,34 +4717,34 @@ static void preload_ring_buffers(struct 
si_shader_context *ctx)
                                            ctx->param_rw_buffers);
 
        if (ctx->screen->b.chip_class <= VI &&
            (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
                unsigned ring =
                        ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
                                                             : SI_ES_RING_ESGS;
                LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0);
 
                ctx->esgs_ring =
-                       ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+                       ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
        }
 
        if (ctx->shader->is_gs_copy_shader) {
                LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
 
                ctx->gsvs_ring[0] =
-                       ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
+                       ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
        } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
                const struct si_shader_selector *sel = ctx->shader->selector;
                LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
                LLVMValueRef base_ring;
 
-               base_ring = ac_build_indexed_load_const(&ctx->ac, buf_ptr, 
offset);
+               base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
 
                /* The conceptual layout of the GSVS ring is
                 *   v0c0 .. vLv0 v0c1 .. vLc1 ..
                 * but the real memory layout is swizzled across
                 * threads:
                 *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
                 *   t16v0c0 ..
                 * Override the buffer descriptor accordingly.
                 */
                LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2);
@@ -4813,21 +4813,21 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
 
        /* Use the fixed-point gl_FragCoord input.
         * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
         * per coordinate to get the repeating effect.
         */
        address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5);
        address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);
 
        /* Load the buffer descriptor. */
        slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
-       desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot);
+       desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot);
 
        /* The stipple pattern is 32x32, each row has 32 bits. */
        offset = LLVMBuildMul(builder, address[1],
                              LLVMConstInt(ctx->i32, 4, 0), "");
        row = buffer_load_const(ctx, desc, offset);
        row = ac_to_integer(&ctx->ac, row);
        bit = LLVMBuildLShr(builder, row, address[0], "");
        bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");
 
        /* The intrinsic kills the thread if arg < 0. */
@@ -6882,21 +6882,21 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
        }
 
        /* Compute vertex load indices from instance divisors. */
        LLVMValueRef instance_divisor_constbuf = NULL;
 
        if (key->vs_prolog.states.instance_divisor_is_fetched) {
                LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
                LLVMValueRef buf_index =
                        LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 
0);
                instance_divisor_constbuf =
-                       ac_build_indexed_load_const(&ctx->ac, list, buf_index);
+                       ac_build_load_to_sgpr(&ctx->ac, list, buf_index);
        }
 
        for (i = 0; i <= key->vs_prolog.last_input; i++) {
                bool divisor_is_one =
                        key->vs_prolog.states.instance_divisor_is_one & (1u << 
i);
                bool divisor_is_fetched =
                        key->vs_prolog.states.instance_divisor_is_fetched & (1u 
<< i);
                LLVMValueRef index;
 
                if (divisor_is_one || divisor_is_fetched) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index a2b2b87..ec11c75 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -138,21 +138,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context 
*ctx,
                index = LLVMBuildMul(builder, index,
                                     LLVMConstInt(ctx->i32, 2, 0), "");
                index = LLVMBuildAdd(builder, index,
                                     ctx->i32_1, "");
                list = LLVMBuildPointerCast(builder, list,
                                            si_const_array(ctx->v4i32, 0), "");
        } else {
                assert(desc_type == AC_DESC_IMAGE);
        }
 
-       rsrc = ac_build_indexed_load_const(&ctx->ac, list, index);
+       rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
        if (dcc_off)
                rsrc = force_dcc_off(ctx, rsrc);
        return rsrc;
 }
 
 /**
  * Load the resource descriptor for \p image.
  */
 static void
 image_fetch_rsrc(
@@ -1127,21 +1127,21 @@ LLVMValueRef si_load_sampler_desc(struct 
si_shader_context *ctx,
                break;
        case AC_DESC_SAMPLER:
                /* The sampler state is at [12:15]. */
                index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 
0), "");
                index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 
0), "");
                list = LLVMBuildPointerCast(builder, list,
                                            si_const_array(ctx->v4i32, 0), "");
                break;
        }
 
-       return ac_build_indexed_load_const(&ctx->ac, list, index);
+       return ac_build_load_to_sgpr(&ctx->ac, list, index);
 }
 
 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
  *
  * SI-CI:
  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
  *   filtering manually. The driver sets img7 to a mask clearing
  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
  *     s_and_b32 samp0, samp0, img7
  *
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to