From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 42 ++++++++++++++--------- src/amd/common/ac_llvm_build.h | 14 ++++---- src/amd/common/ac_nir_to_llvm.c | 22 ++++++------ src/gallium/drivers/radeonsi/si_shader.c | 34 +++++++++--------- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 4 +-- 5 files changed, 61 insertions(+), 55 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 1d97b09..949f181 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -710,46 +710,54 @@ ac_build_indexed_store(struct ac_llvm_context *ctx, ac_build_gep0(ctx, base_ptr, index)); } /** * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. * It's equivalent to doing a load from &base_ptr[index]. * * \param base_ptr Where the array starts. * \param index The element index into the array. * \param uniform Whether the base_ptr and index can be assumed to be - * dynamically uniform + * dynamically uniform (i.e. load to an SGPR) + * \param invariant Whether the load is invariant (no other opcodes affect it) */ -LLVMValueRef -ac_build_indexed_load(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - bool uniform) +static LLVMValueRef +ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index, bool uniform, bool invariant) { - LLVMValueRef pointer; + LLVMValueRef pointer, result; pointer = ac_build_gep0(ctx, base_ptr, index); if (uniform) LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); - return LLVMBuildLoad(ctx->builder, pointer, ""); + result = LLVMBuildLoad(ctx->builder, pointer, ""); + if (invariant) + LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); + return result; } -/** - * Do a load from &base_ptr[index], but also add a flag that it's loading - * a constant from a dynamically uniform index. - */ -LLVMValueRef -ac_build_indexed_load_const(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index) { - LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true); - LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); - return result; + return ac_build_load_custom(ctx, base_ptr, index, false, false); +} + +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index) +{ + return ac_build_load_custom(ctx, base_ptr, index, false, true); +} + +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index) +{ + return ac_build_load_custom(ctx, base_ptr, index, true, true); } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), * or v4i32 (num_channels=3,4). */ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index ac8ea9c..f0b5875 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -143,28 +143,26 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, LLVMValueRef value); -LLVMValueRef -ac_build_indexed_load(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - bool uniform); - -LLVMValueRef -ac_build_indexed_load_const(struct ac_llvm_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, + LLVMValueRef index); +LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index); +LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, + LLVMValueRef base_ptr, LLVMValueRef index); void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, unsigned num_channels, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, bool glc, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 4492d8e..8278486 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -792,21 +792,21 @@ static void create_function(struct nir_to_llvm_context *ctx) } else ctx->descriptor_sets[i] = NULL; } } else { uint32_t desc_sgpr_idx = user_sgpr_idx; set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2); for (unsigned i = 0; i < num_sets; ++i) { if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8); - ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false)); + ctx->descriptor_sets[i] = ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false)); } else ctx->descriptor_sets[i] = NULL; } ctx->shader_info->need_indirect_descriptor_sets = true; } if (ctx->shader_info->info.needs_push_constants) { set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2); } @@ -2518,21 +2518,21 @@ out: *const_out = const_offset; *indir_out = offset; } static LLVMValueRef lds_load(struct nir_to_llvm_context *ctx, LLVMValueRef dw_addr) { LLVMValueRef value; - value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); + value = ac_build_load(&ctx->ac, ctx->lds, dw_addr); return value; } static void lds_store(struct nir_to_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) { value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, ""); ac_build_indexed_store(&ctx->ac, ctx->lds, dw_addr, value); @@ -3733,21 +3733,21 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, LLVMValueRef sample_id) { LLVMValueRef result; LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false)); ptr = LLVMBuildBitCast(ctx->builder, ptr, const_array(ctx->v2f32, 64), ""); sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, ""); - result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false); + result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); return result; } static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { LLVMValueRef values[2]; values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]); values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]); @@ -4224,21 +4224,21 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, assert(stride % type_size == 0); if (!index) index = ctx->i32zero; index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0)); list = LLVMBuildPointerCast(builder, list, const_array(type, 0), ""); - return ac_build_indexed_load_const(&ctx->ac, list, index); + return ac_build_load_to_sgpr(&ctx->ac, list, index); } static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, bool image, bool write) { LLVMValueRef index = NULL; unsigned constant_index = 0; const nir_deref *tail = &deref->deref; @@ -4921,21 +4921,21 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, ctx->abi.start_instance, ""); ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, ctx->shader_info->vs.vgpr_comp_cnt); } else buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id, ctx->abi.base_vertex, ""); for (unsigned i = 0; i < attrib_count; ++i, ++idx) { t_offset = LLVMConstInt(ctx->i32, index + i, false); - t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); input = ac_build_buffer_load_format(&ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->i32, 0, false), true); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder, @@ -6211,43 +6211,43 @@ ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx) outinfo->vs_output_param_offset, VARYING_SLOT_MAX, &outinfo->param_exports); } static void ac_setup_rings(struct nir_to_llvm_context *ctx) { if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { - ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false)); + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false)); } if (ctx->is_gs_copy_shader) { - ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false)); + ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false)); } if (ctx->stage == MESA_SHADER_GEOMETRY) { LLVMValueRef tmp; - ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false)); - ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false)); + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false)); + ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false)); ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, ""); ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), ""); tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, ""); tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, ""); ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, ""); } if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false)); - ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false)); + ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false)); + ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false)); } } static unsigned ac_nir_get_max_workgroup_size(enum chip_class chip_class, const struct nir_shader *nir) { switch (nir->stage) { case MESA_SHADER_TESS_CTRL: return chip_class >= CIK ? 128 : 64; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 506da6f..f83f45c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -554,21 +554,21 @@ void si_llvm_load_input_vs( LLVMValueRef t_offset; LLVMValueRef t_list; LLVMValueRef vertex_index; LLVMValueRef input[3]; /* Load the T list */ t_list_ptr = LLVMGetParam(ctx->main_fn, ctx->param_vertex_buffers); t_offset = LLVMConstInt(ctx->i32, input_index, 0); - t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); vertex_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); fix_fetch = ctx->shader->key.mono.vs_fix_fetch[input_index]; /* Do multiple loads for special formats. */ switch (fix_fetch) { case SI_FIX_FETCH_RGB_64_FLOAT: @@ -1092,26 +1092,26 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) values[chan] = lds_load(bld_base, type, chan, dw_addr); return lp_build_gather_values(&ctx->gallivm, values, TGSI_NUM_CHANNELS); } dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, swizzle, 0)); - value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); + value = ac_build_load(&ctx->ac, ctx->lds, dw_addr); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, ctx->i32_1); - value2 = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); + value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } return bitcast(bld_base, type, value); } /** * Store to LDS. * * \param swizzle offset (typically 0..3) @@ -1610,21 +1610,21 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, { return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL, 0, 0, 0, true, true); } static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id) { struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld; LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0); - LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index); + LLVMValueRef resource = ac_build_load_to_sgpr(&ctx->ac, desc, buf_index); /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */ LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8); LLVMValueRef offset1 = LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->i32, 4, 0), ""); LLVMValueRef pos[4] = { buffer_load_const(ctx, resource, offset0), buffer_load_const(ctx, resource, offset1), LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, 0) @@ -1785,21 +1785,21 @@ void si_load_system_value(struct si_shader_context *ctx, } case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI: case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI: { LLVMValueRef buf, slot, val[4]; int i, offset; slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0); buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - buf = ac_build_indexed_load_const(&ctx->ac, buf, slot); + buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot); offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0; for (i = 0; i < 4; i++) val[i] = buffer_load_const(ctx, buf, LLVMConstInt(ctx->i32, (offset + i) * 4, 0)); value = lp_build_gather_values(&ctx->gallivm, val, 4); break; } case TGSI_SEMANTIC_PRIMID: @@ -1928,49 +1928,49 @@ void si_declare_compute_memory(struct si_shader_context *ctx, LLVMSetAlignment(var, 4); ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); } static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) { LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); - return ac_build_indexed_load_const(&ctx->ac, list_ptr, - LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0)); + return ac_build_load_to_sgpr(&ctx->ac, list_ptr, + LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0)); } static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); index = LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); - return ac_build_indexed_load_const(&ctx->ac, ptr, index); + return ac_build_load_to_sgpr(&ctx->ac, ptr, index); } static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers); index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0), index, ""); - return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index); + return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index); } static LLVMValueRef fetch_constant( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_ind_register *ireg = ®->Indirect; @@ -2001,21 +2001,21 @@ static LLVMValueRef fetch_constant( idx = reg->Register.Index * 4 + swizzle; if (reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); LLVMValueRef index; index = si_get_bounded_indirect_index(ctx, ®->DimIndirect, reg->Dimension.Index, ctx->num_const_buffers); index = LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); - bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index); + bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index); } else bufp = load_const_buffer_desc(ctx, buf); if (reg->Register.Indirect) { addr = si_get_indirect_index(ctx, ireg, 16, idx * 4); } else { addr = LLVMConstInt(ctx->i32, idx * 4, 0); } return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr)); @@ -2276,21 +2276,21 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base, { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; unsigned reg_index; unsigned chan; unsigned const_chan; LLVMValueRef base_elt; LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); LLVMValueRef constbuf_index = LLVMConstInt(ctx->i32, SI_VS_CONST_CLIP_PLANES, 0); - LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, ptr, constbuf_index); + LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index); for (reg_index = 0; reg_index < 2; reg_index ++) { struct ac_export_args *args = &pos[2 + reg_index]; args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMConstReal(ctx->f32, 0.0f); /* Compute dot products of position and user clip plane vectors */ @@ -2433,21 +2433,21 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); for (i = 0; i < 4; i++) { if (!so->stride[i]) continue; LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_VS_STREAMOUT_BUF0 + i, 0); - so_buffers[i] = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn, ctx->param_streamout_offset[i]); so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), ""); so_write_offset[i] = LLVMBuildMul(builder, so_write_index, LLVMConstInt(ctx->i32, so->stride[i]*4, 0), ""); so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); } @@ -4717,34 +4717,34 @@ static void preload_ring_buffers(struct si_shader_context *ctx) ctx->param_rw_buffers); if (ctx->screen->b.chip_class <= VI && (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { unsigned ring = ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS : SI_ES_RING_ESGS; LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0); ctx->esgs_ring = - ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); } if (ctx->shader->is_gs_copy_shader) { LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0); ctx->gsvs_ring[0] = - ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); } else if (ctx->type == PIPE_SHADER_GEOMETRY) { const struct si_shader_selector *sel = ctx->shader->selector; LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0); LLVMValueRef base_ring; - base_ring = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); /* The conceptual layout of the GSVS ring is * v0c0 .. vLv0 v0c1 .. vLc1 .. * but the real memory layout is swizzled across * threads: * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL * t16v0c0 .. * Override the buffer descriptor accordingly. */ LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2); @@ -4813,21 +4813,21 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, /* Use the fixed-point gl_FragCoord input. * Since the stipple pattern is 32x32 and it repeats, just get 5 bits * per coordinate to get the repeating effect. */ address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5); address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5); /* Load the buffer descriptor. */ slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0); - desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot); + desc = ac_build_load_to_sgpr(&ctx->ac, param_rw_buffers, slot); /* The stipple pattern is 32x32, each row has 32 bits. */ offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->i32, 4, 0), ""); row = buffer_load_const(ctx, desc, offset); row = ac_to_integer(&ctx->ac, row); bit = LLVMBuildLShr(builder, row, address[0], ""); bit = LLVMBuildTrunc(builder, bit, ctx->i1, ""); /* The intrinsic kills the thread if arg < 0. */ @@ -6882,21 +6882,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } /* Compute vertex load indices from instance divisors. */ LLVMValueRef instance_divisor_constbuf = NULL; if (key->vs_prolog.states.instance_divisor_is_fetched) { LLVMValueRef list = si_prolog_get_rw_buffers(ctx); LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 0); instance_divisor_constbuf = - ac_build_indexed_load_const(&ctx->ac, list, buf_index); + ac_build_load_to_sgpr(&ctx->ac, list, buf_index); } for (i = 0; i <= key->vs_prolog.last_input; i++) { bool divisor_is_one = key->vs_prolog.states.instance_divisor_is_one & (1u << i); bool divisor_is_fetched = key->vs_prolog.states.instance_divisor_is_fetched & (1u << i); LLVMValueRef index; if (divisor_is_one || divisor_is_fetched) { diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index a2b2b87..ec11c75 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -138,21 +138,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, si_const_array(ctx->v4i32, 0), ""); } else { assert(desc_type == AC_DESC_IMAGE); } - rsrc = ac_build_indexed_load_const(&ctx->ac, list, index); + rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); if (dcc_off) rsrc = force_dcc_off(ctx, rsrc); return rsrc; } /** * Load the resource descriptor for \p image. */ static void image_fetch_rsrc( @@ -1127,21 +1127,21 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, break; case AC_DESC_SAMPLER: /* The sampler state is at [12:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); list = LLVMBuildPointerCast(builder, list, si_const_array(ctx->v4i32, 0), ""); break; } - return ac_build_indexed_load_const(&ctx->ac, list, index); + return ac_build_load_to_sgpr(&ctx->ac, list, index); } /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev