From: Marek Olšák <marek.ol...@amd.com> This restores performance for the drirc workaround, i.e. KILL_IF does: visible = src0 >= 0; kill_flag &= visible; // accumulate kills amdgcn_kill(wqm_vote(visible)); // kill fully dead quads only
And all helper pixels are killed at the end of the shader: amdgcn_kill(kill_flag); --- src/amd/common/ac_llvm_build.c | 7 +++++++ src/amd/common/ac_llvm_build.h | 1 + src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 0550f80..2f8f59f 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1398,20 +1398,27 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, v2f16, args, 2, AC_FUNC_ATTR_READNONE); return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_LEGACY); } +LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1) +{ + assert(HAVE_LLVM >= 0x0600); + return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, + &i1, 1, AC_FUNC_ATTR_READNONE); +} + void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1) { if (HAVE_LLVM >= 0x0600) { ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0); return; } LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1, LLVMConstReal(ctx->f32, 1), diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index b721782..088a986 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -258,20 +258,21 @@ struct ac_image_args { LLVMValueRef addr; unsigned dmask; bool unorm; bool da; }; LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a); LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]); +LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1); void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1); LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset, LLVMValueRef width, bool is_signed); void ac_get_image_intr_name(const char *base_name, LLVMTypeRef data_type, LLVMTypeRef coords_type, LLVMTypeRef rsrc_type, char *out_name, unsigned out_len); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 283a889..913b6c3 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -62,20 +62,26 @@ static void kil_emit(const struct lp_build_tgsi_action *action, LLVMValueRef visible; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) { visible = emit_data->args[0]; } else { assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL); visible = LLVMConstInt(ctx->i1, false, 0); } if (ctx->shader->selector->force_correct_derivs_after_kill) { + /* LLVM 6.0 can kill immediately while maintaining WQM. */ + if (HAVE_LLVM >= 0x0600) { + ac_build_kill_if_false(&ctx->ac, + ac_build_wqm_vote(&ctx->ac, visible)); + } + LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, ""); mask = LLVMBuildAnd(builder, mask, visible, ""); LLVMBuildStore(builder, mask, ctx->postponed_kill); return; } ac_build_kill_if_false(&ctx->ac, visible); } static void emit_icmp(const struct lp_build_tgsi_action *action, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev