From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 46 +++++++++++++++++++++++--------- src/amd/common/ac_llvm_build.h | 3 ++- src/amd/common/ac_nir_to_llvm.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 23 +++++++--------- 4 files changed, 47 insertions(+), 27 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 3df9f53..237e929 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -626,47 +626,69 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, unsigned glc, unsigned slc, - bool can_speculate) + bool can_speculate, + bool allow_smem) { + LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0); + if (voffset) + offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); + if (soffset) + offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); + + /* TODO: VI and later generations can use SMEM with GLC=1.*/ + if (allow_smem && !glc && !slc) { + assert(vindex == NULL); + + LLVMValueRef result[4]; + + for (int i = 0; i < num_channels; i++) { + if (i) { + offset = LLVMBuildAdd(ctx->builder, offset, + LLVMConstInt(ctx->i32, 4, 0), ""); + } + LLVMValueRef args[2] = {rsrc, offset}; + result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32", + ctx->f32, args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); + } + if (num_channels == 1) + return result[0]; + + if (num_channels == 3) + result[num_channels++] = LLVMGetUndef(ctx->f32); + return ac_build_gather_values(ctx, result, num_channels); + } + unsigned func = CLAMP(num_channels, 1, 3) - 1; LLVMValueRef args[] = { LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), - LLVMConstInt(ctx->i32, inst_offset, 0), + offset, LLVMConstInt(ctx->i1, glc, 0), LLVMConstInt(ctx->i1, slc, 0) }; LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), ctx->v4f32}; const char *type_names[] = {"f32", "v2f32", "v4f32"}; char name[256]; - if (voffset) { - args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset, - ""); - } - - if (soffset) { - args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset, - ""); - } - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", type_names[func]); return ac_build_intrinsic(ctx, name, types[func], args, ARRAY_SIZE(args), /* READNONE means writes can't affect it, while * READONLY means that writes can affect it. */ can_speculate && HAVE_LLVM >= 0x0400 ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index c1b5f3d..ebb78fb 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -136,21 +136,22 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, unsigned glc, unsigned slc, - bool can_speculate); + bool can_speculate, + bool allow_smem); LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vindex, LLVMValueRef voffset, bool can_speculate); LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 8ae0a75..28ba47d 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2816,21 +2816,21 @@ load_tes_input(struct nir_to_llvm_context *ctx, param = shader_io_get_unique_index(instr->variables[0]->var->data.location); if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { const_index -= 3; param++; } buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, vertex_index, indir_index); result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL, - buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true); + buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); result = trim_vector(ctx, result, instr->num_components); result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), ""); return result; } static LLVMValueRef load_gs_input(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef indir_index, vtx_offset; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index fe0cd4a..c2b3cae 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -826,39 +826,39 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, LLVMValueRef base, bool can_speculate) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; LLVMValueRef value, value2; LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type); LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4); if (swizzle == ~0) { value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, - 0, 1, 0, can_speculate); + 0, 1, 0, can_speculate, false); return LLVMBuildBitCast(gallivm->builder, value, vec_type, ""); } if (!tgsi_type_is_64bit(type)) { value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, - 0, 1, 0, can_speculate); + 0, 1, 0, can_speculate, false); value = LLVMBuildBitCast(gallivm->builder, value, vec_type, ""); return LLVMBuildExtractElement(gallivm->builder, value, LLVMConstInt(ctx->i32, swizzle, 0), ""); } value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, - swizzle * 4, 1, 0, can_speculate); + swizzle * 4, 1, 0, can_speculate, false); value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, - swizzle * 4 + 4, 1, 0, can_speculate); + swizzle * 4 + 4, 1, 0, can_speculate, false); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } /** * Load from LDS. * * \param type output value type * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4 * \param dw_addr address in dwords @@ -1147,28 +1147,28 @@ static LLVMValueRef fetch_input_gs( vtx_offset_param += ctx->param_gs_vtx2_offset - 2; } vtx_offset = lp_build_mul_imm(uint, LLVMGetParam(ctx->main_fn, vtx_offset_param), 4); soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0); value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0, - vtx_offset, soffset, 0, 1, 0, true); + vtx_offset, soffset, 0, 1, 0, true, false); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0); value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0, vtx_offset, soffset, - 0, 1, 0, true); + 0, 1, 0, true, false); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } return LLVMBuildBitCast(gallivm->builder, value, tgsi2llvmtype(bld_base, type), ""); } static int lookup_interp_param_index(unsigned interpolate, unsigned location) { @@ -1382,26 +1382,22 @@ static LLVMValueRef get_sample_id(struct si_shader_context *ctx) } /** * Load a dword from a constant buffer. */ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource, LLVMValueRef offset) { - LLVMBuilderRef builder = ctx->gallivm.builder; - LLVMValueRef args[2] = {resource, offset}; - - return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2, - LP_FUNC_ATTR_READNONE | - LP_FUNC_ATTR_LEGACY); + return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL, + 0, 0, 0, true, true); } static LLVMValueRef load_sample_position(struct si_shader_context *ctx, LLVMValueRef sample_id) { struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0); LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index); @@ -5201,21 +5197,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, } LLVMValueRef soffset = LLVMConstInt(ctx.i32, offset * gs_selector->gs_max_out_vertices * 16 * 4, 0); offset++; outputs[i].values[chan] = ac_build_buffer_load(&ctx.ac, ctx.gsvs_ring[0], 1, ctx.i32_0, voffset, - soffset, 0, 1, 1, true); + soffset, 0, 1, 1, + true, false); } } /* Streamout and exports. */ if (gs_selector->so.num_outputs) { si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream); } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev