With potentially more than 1 wave working on a patch we need the barrier. Also adds a barrier before loading the tessellation factors to write them to the TF ring.
Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/gallium/drivers/radeonsi/si_shader.c | 48 ++++++++++---------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5728be0..9c13637 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2499,6 +2499,19 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) } } +static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + + lp_build_intrinsic(gallivm->builder, + HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" + : "llvm.AMDGPU.barrier.local", + ctx->voidt, NULL, 0, LLVMNoUnwindAttribute); +} + static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, @@ -2513,6 +2526,8 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, unsigned stride, outer_comps, inner_comps, i; struct lp_build_if_state if_ctx; + si_llvm_emit_barrier(NULL, bld_base, NULL); + /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. * @@ -3198,18 +3213,6 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); -/* Prevent optimizations (at least of memory accesses) across the current - * point in the program by emitting empty inline assembly that is marked as - * having side effects. - */ -static void emit_optimization_barrier(struct si_shader_context *ctx) -{ - LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder; - LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); - LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false); - LLVMBuildCall(builder, inlineasm, NULL, 0, ""); -} - static void emit_waitcnt(struct si_shader_context *ctx) { struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; @@ -5139,27 +5142,6 @@ static void si_llvm_emit_primitive( ctx->voidt, args, 2, LLVMNoUnwindAttribute); } -static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - - /* The real barrier instruction isn’t needed, because an entire patch - * always fits into a single wave. - */ - if (ctx->type == PIPE_SHADER_TESS_CTRL) { - emit_optimization_barrier(ctx); - return; - } - - lp_build_intrinsic(gallivm->builder, - HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" - : "llvm.AMDGPU.barrier.local", - ctx->voidt, NULL, 0, LLVMNoUnwindAttribute); -} - static const struct lp_build_tgsi_action tex_action = { .fetch_args = tex_fetch_args, .emit = build_tex_intrinsic, -- 2.8.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev