From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 6 ++++++ src/amd/common/ac_llvm_build.h | 2 ++ src/amd/common/ac_nir_to_llvm.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader.c | 25 ++++++++++++++----------- 4 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 4c9beda..305abd3 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1765,20 +1765,26 @@ void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE), "lds"); } LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr) { return ac_build_load_custom(ctx, ctx->lds, dw_addr, false, false, true); } +LLVMValueRef ac_lds_load_invariant(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr) +{ + return ac_build_load_custom(ctx, ctx->lds, dw_addr, false, true, false); +} + void ac_lds_store_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) { ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), true); } void ac_lds_store_writeonly(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) { ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), false); diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 25a540a..3bd085c 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -286,20 +286,22 @@ void ac_get_image_intr_name(const char *base_name, void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, uint8_t *vs_output_param_offset, uint32_t num_outputs, uint8_t *num_param_exports); void ac_init_exec_full_mask(struct ac_llvm_context *ctx); void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); +LLVMValueRef ac_lds_load_invariant(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr); void ac_lds_store_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); void ac_lds_store_writeonly(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); #ifdef __cplusplus } diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 3f41b9f..b4d840f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2719,21 +2719,21 @@ load_tcs_input(struct nir_to_llvm_context *ctx, false, NULL, per_vertex ? &vertex_index : NULL, &const_index, &indir_index); stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); dw_addr = get_tcs_in_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, indir_index); unsigned comp = instr->variables[0]->var->data.location_frac; for (unsigned i = 0; i < instr->num_components + comp; i++) { - value[i] = ac_lds_load_volatile(&ctx->ac, dw_addr); + value[i] = ac_lds_load_invariant(&ctx->ac, dw_addr); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); } result = build_varying_gather_values(&ctx->ac, value, instr->num_components, comp); result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); return result; } static LLVMValueRef load_tcs_output(struct nir_to_llvm_context *ctx, @@ -2901,21 +2901,21 @@ load_gs_input(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->ac.i32, 4, false), ""); param = shader_io_get_unique_index(instr->variables[0]->var->data.location); unsigned comp = instr->variables[0]->var->data.location_frac; for (unsigned i = comp; i < instr->num_components + comp; i++) { if (ctx->ac.chip_class >= GFX9) { LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); - value[i] = ac_lds_load_volatile(&ctx->ac, dw_addr); + value[i] = ac_lds_load_invariant(&ctx->ac, dw_addr); } else { args[0] = ctx->esgs_ring; args[1] = vtx_offset; args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false); args[3] = ctx->ac.i32_0; args[4] = ctx->ac.i32_1; /* OFFEN */ args[5] = ctx->ac.i32_0; /* IDXEN */ args[6] = ctx->ac.i32_1; /* GLC */ args[7] = ctx->ac.i32_0; /* SLC */ args[8] = ctx->ac.i32_0; /* TFE */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 98ac914..05c95a6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1069,48 +1069,51 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, /** * Load from LDS. * * \param type output value type * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4 * \param dw_addr address in dwords */ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, unsigned swizzle, - LLVMValueRef dw_addr) + LLVMValueRef dw_addr, bool Volatile) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef value; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) - values[chan] = lds_load(bld_base, type, chan, dw_addr); + values[chan] = lds_load(bld_base, type, chan, dw_addr, Volatile); return lp_build_gather_values(&ctx->gallivm, values, TGSI_NUM_CHANNELS); } /* Split 64-bit loads. */ if (tgsi_type_is_64bit(type)) { LLVMValueRef lo, hi; - lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr); - hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr); + lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr, Volatile); + hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr, Volatile); return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi); } dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, swizzle, 0)); - value = ac_lds_load_volatile(&ctx->ac, dw_addr); + if (Volatile) + value = ac_lds_load_volatile(&ctx->ac, dw_addr); + else + value = ac_lds_load_invariant(&ctx->ac, dw_addr); return bitcast(bld_base, type, value); } /** * Store to LDS. * * \param swizzle offset (typically 0..3) * \param dw_addr address in dwords * \param value value to store @@ -1159,41 +1162,41 @@ static LLVMValueRef fetch_input_tcs( const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef dw_addr, stride; stride = get_tcs_in_vertex_dw_stride(ctx); dw_addr = get_tcs_in_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr); - return lds_load(bld_base, type, swizzle, dw_addr); + return lds_load(bld_base, type, swizzle, dw_addr, false); } static LLVMValueRef fetch_output_tcs( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef dw_addr, stride; if (reg->Register.Dimension) { stride = get_tcs_out_vertex_dw_stride(ctx); dw_addr = get_tcs_out_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr); } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr); } - return lds_load(bld_base, type, swizzle, dw_addr); + return lds_load(bld_base, type, swizzle, dw_addr, true); } static LLVMValueRef fetch_input_tes( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef buffer, base, addr; @@ -1343,21 +1346,21 @@ static LLVMValueRef fetch_input_gs( vtx_offset = unpack_param(ctx, ctx->param_gs_vtx45_offset, index % 2 ? 16 : 0, 16); break; default: assert(0); return NULL; } vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset, LLVMConstInt(ctx->i32, param * 4, 0), ""); - return lds_load(bld_base, type, swizzle, vtx_offset); + return lds_load(bld_base, type, swizzle, vtx_offset, false); } /* GFX6: input load from the ESGS ring in memory. */ if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = fetch_input_gs(bld_base, reg, type, chan); } return lp_build_gather_values(&ctx->gallivm, values, @@ -2754,21 +2757,21 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) LLVMValueRef lds_ptr = LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->i32, 4 * i, 0), ""); LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->i32, i, 0)); LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0, - lds_ptr); + lds_ptr, false); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0, 1, 0, true, false); } } static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, LLVMValueRef tcs_out_current_patch_data_offset, @@ -2840,25 +2843,25 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, lds_base = tcs_out_current_patch_data_offset; lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->i32, tess_inner_index * 4, 0), ""); lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->i32, tess_outer_index * 4, 0), ""); for (i = 0; i < outer_comps; i++) { outer[i] = out[i] = - lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); + lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer, true); } for (i = 0; i < inner_comps; i++) { inner[i] = out[outer_comps+i] = - lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner); + lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner, true); } } if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { /* For isolines, the hardware expects tess factors in the * reverse order from what GLSL / TGSI specify. */ LLVMValueRef tmp = out[0]; out[0] = out[1]; out[1] = tmp; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev