From: Marek Olšák <marek.ol...@amd.com>

This restores performance for the drirc workaround, i.e.
KILL_IF does:
   visible = src0 >= 0;
   kill_flag &= visible; // accumulate kills
   amdgcn_kill(wqm_vote(visible)); // kill fully dead quads only

And all helper pixels are killed at the end of the shader:
   amdgcn_kill(kill_flag);
---
 src/amd/common/ac_llvm_build.c                    | 7 +++++++
 src/amd/common/ac_llvm_build.h                    | 1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 6 ++++++
 3 files changed, 14 insertions(+)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 0550f80..2f8f59f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1398,20 +1398,27 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct 
ac_llvm_context *ctx,
                                           v2f16, args, 2,
                                           AC_FUNC_ATTR_READNONE);
                return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
        }
 
        return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
                                  AC_FUNC_ATTR_READNONE |
                                  AC_FUNC_ATTR_LEGACY);
 }
 
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
+{
+       assert(HAVE_LLVM >= 0x0600);
+       return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
+                                 &i1, 1, AC_FUNC_ATTR_READNONE);
+}
+
 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
        if (HAVE_LLVM >= 0x0600) {
                ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
                                   &i1, 1, 0);
                return;
        }
 
        LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
                                             LLVMConstReal(ctx->f32, 1),
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index b721782..088a986 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -258,20 +258,21 @@ struct ac_image_args {
        LLVMValueRef addr;
        unsigned dmask;
        bool unorm;
        bool da;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                   struct ac_image_args *a);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                    LLVMValueRef args[2]);
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
                          LLVMValueRef offset, LLVMValueRef width,
                          bool is_signed);
 
 void ac_get_image_intr_name(const char *base_name,
                            LLVMTypeRef data_type,
                            LLVMTypeRef coords_type,
                            LLVMTypeRef rsrc_type,
                            char *out_name, unsigned out_len);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 283a889..913b6c3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -62,20 +62,26 @@ static void kil_emit(const struct lp_build_tgsi_action 
*action,
        LLVMValueRef visible;
 
        if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) {
                visible = emit_data->args[0];
        } else {
                assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL);
                visible = LLVMConstInt(ctx->i1, false, 0);
        }
 
        if (ctx->shader->selector->force_correct_derivs_after_kill) {
+               /* LLVM 6.0 can kill immediately while maintaining WQM. */
+               if (HAVE_LLVM >= 0x0600) {
+                       ac_build_kill_if_false(&ctx->ac,
+                                              ac_build_wqm_vote(&ctx->ac, 
visible));
+               }
+
                LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, 
"");
                mask = LLVMBuildAnd(builder, mask, visible, "");
                LLVMBuildStore(builder, mask, ctx->postponed_kill);
                return;
        }
 
        ac_build_kill_if_false(&ctx->ac, visible);
 }
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to