This functionality is currently disabled due to a bug in LLVM.
The idea of this implementation is taken from the ROCm Device Libs:
https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/ockl/src/wfredscan.cl
---
 src/amd/common/ac_llvm_build.c | 98 ++++++++++++++++++++++++++++------
 1 file changed, 82 insertions(+), 16 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index c9b2e36b63..8300c9d6bd 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -2967,26 +2967,66 @@ ac_build_alu_op(struct ac_llvm_context *ctx, 
LLVMValueRef lhs, LLVMValueRef rhs,
        }
 }
 
-/* TODO: add inclusive and excluse scan functions for SI chip class.  */
 static LLVMValueRef
 ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, 
LLVMValueRef identity)
 {
        LLVMValueRef result, tmp;
        result = src;
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, 
false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, 
false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, 
false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
-       tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, 
false);
-       result = ac_build_alu_op(ctx, result, tmp, op);
+       if (ctx->chip_class >= VI) {
+               tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, 
false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, 
false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, 
false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 
0xe, false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 
0xc, false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 
0xf, false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 
0xf, false);
+               result = ac_build_alu_op(ctx, result, tmp, op);
+       } else {
+               LLVMValueRef tid = ac_get_thread_id(ctx);
+               LLVMValueRef active;
+               tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 
0x00, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
ctx->i32_1, ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 
0x01, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 2, 0), ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 
0x03, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 4, 0), ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 
0x07, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 8, 0), ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 
0x0f, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 16, 0), ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+               tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 
0));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 32, 0), ""),
+                                      ctx->i32_0, "");
+               tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
+               result = ac_build_alu_op(ctx, result, tmp, op);
+       }
        return result;
 }
 
@@ -3015,7 +3055,33 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, 
LLVMValueRef src, nir_op op
        result = LLVMBuildBitCast(ctx->builder,
                                                                
ac_build_set_inactive(ctx, src, identity),
                                                                
LLVMTypeOf(identity), "");
-       result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, 
false);
+       if (ctx->chip_class >= VI) {
+               result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 
0xf, false);
+       } else {
+               LLVMValueRef active, tmp1, tmp2;
+               LLVMValueRef tid = ac_get_thread_id(ctx);
+               tmp1 = ac_build_ds_swizzle(ctx, result, (1 << 15) | 
dpp_quad_perm(0, 0, 1, 2));
+               tmp2 = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x18, 0x03, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 0x7, 0), ""),
+                                      LLVMConstInt(ctx->i32, 0x4, 0), "");
+               tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+               tmp2 = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x10, 0x07, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 0xf, 0), ""),
+                                      LLVMConstInt(ctx->i32, 0x8, 0), "");
+               tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+               tmp2 = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x00, 0x0f, 0x00));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+                                      LLVMBuildAnd(ctx->builder, tid, 
LLVMConstInt(ctx->i32, 0x1f, 0), ""),
+                                      LLVMConstInt(ctx->i32, 0x10, 0), "");
+               tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+               tmp2 = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 
31, 0));
+               active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, 
LLVMConstInt(ctx->i32, 32, 0), "");
+               tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
+               active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, 
LLVMConstInt(ctx->i32, 0, 0), "");
+               result = LLVMBuildSelect(ctx->builder, active, identity, tmp1, 
"");
+       }
        result = ac_build_scan(ctx, op, result, identity);
 
        return ac_build_wwm(ctx, result);
-- 
2.17.0

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to