I'm a bit late, but just some stylistic nitpicks for the future:

As a general stylistic note, prefer `false' as the last argument to LLVMConstInt (since it's a boolean, whether to sign-extend).

[snip]
+{
+       ac_build_optimization_barrier(ctx, &src);
+       LLVMValueRef result;
+       LLVMValueRef identity = get_reduction_identity(ctx, op,
+                                                               
ac_get_type_size(LLVMTypeOf(src)));
+       result = LLVMBuildBitCast(ctx->builder,
+                                                               
ac_build_set_inactive(ctx, src, identity),
+                                                               LLVMTypeOf(identity), 
"");

Weird whitespace, are you using a non-default tab width?


+       result = ac_build_scan(ctx, op, result, identity);
+
+       return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef
+ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op 
op)
+{
+       ac_build_optimization_barrier(ctx, &src);
+       LLVMValueRef result;
+       LLVMValueRef identity = get_reduction_identity(ctx, op,
+                                                               
ac_get_type_size(LLVMTypeOf(src)));
+       result = LLVMBuildBitCast(ctx->builder,
+                                                               
ac_build_set_inactive(ctx, src, identity),
+                                                               LLVMTypeOf(identity), 
"");
+       result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, 
false);
+       result = ac_build_scan(ctx, op, result, identity);
+
+       return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef
+ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, 
unsigned cluster_size)
+{
+       if (cluster_size == 1) return src;

Prefer to put the return on a new line, please.

Thanks,
Nicolai


+       ac_build_optimization_barrier(ctx, &src);
+       LLVMValueRef result, swap;
+       LLVMValueRef identity = get_reduction_identity(ctx, op,
+                                                               
ac_get_type_size(LLVMTypeOf(src)));
+       result = LLVMBuildBitCast(ctx->builder,
+                                                               
ac_build_set_inactive(ctx, src, identity),
+                                                               LLVMTypeOf(identity), 
"");

Weird whitespace again.

With those style nitpicks fixed,

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>


+       swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
+       result = ac_build_alu_op(ctx, result, swap, op);
+       if (cluster_size == 2) return ac_build_wwm(ctx, result);
+
+       swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
+       result = ac_build_alu_op(ctx, result, swap, op);
+       if (cluster_size == 4) return ac_build_wwm(ctx, result);
+
+       if (ctx->chip_class >= VI)
+               swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 
0xf, 0xf, false);
+       else
+               swap = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x1f, 0, 0x04));
+       result = ac_build_alu_op(ctx, result, swap, op);
+       if (cluster_size == 8) return ac_build_wwm(ctx, result);
+
+       if (ctx->chip_class >= VI)
+               swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 
0xf, false);
+       else
+               swap = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x1f, 0, 0x08));
+       result = ac_build_alu_op(ctx, result, swap, op);
+       if (cluster_size == 16) return ac_build_wwm(ctx, result);
+
+       if (ctx->chip_class >= VI && cluster_size != 32)
+               swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 
0xa, 0xf, false);
+       else
+               swap = ac_build_ds_swizzle(ctx, result, 
ds_pattern_bitmode(0x1f, 0, 0x10));
+       result = ac_build_alu_op(ctx, result, swap, op);
+       if (cluster_size == 32) return ac_build_wwm(ctx, result);
+
+       if (ctx->chip_class >= VI) {
+               swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 
0xc, 0xf, false);
+               result = ac_build_alu_op(ctx, result, swap, op);
+               result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 
63, 0));
+               return ac_build_wwm(ctx, result);
+       } else {
+               swap = ac_build_readlane(ctx, result, ctx->i32_0);
+               result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 
32, 0));
+               result = ac_build_alu_op(ctx, result, swap, op);
+               return ac_build_wwm(ctx, result);
+       }
+}
+
+LLVMValueRef
+ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+               unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
+{
+       unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
+       if (ctx->chip_class >= VI && HAVE_LLVM >= 0x0600) {
+               return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
+       } else {
+               return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
+       }
+}
+
+LLVMValueRef
+ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
index)
+{
+       index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), 
"");
+       return ac_build_intrinsic(ctx,
+                 "llvm.amdgcn.ds.bpermute", ctx->i32,
+                 (LLVMValueRef []) {index, src}, 2,
+                 AC_FUNC_ATTR_READNONE |
+                 AC_FUNC_ATTR_CONVERGENT);
+}
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 8b35028a31..44811ce6d9 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -27,7 +27,7 @@
#include <stdbool.h>
  #include <llvm-c/TargetMachine.h>
-
+#include <nir/nir.h>
  #include "amd_family.h"
#ifdef __cplusplus
@@ -417,6 +417,34 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, 
LLVMValueRef param,
  void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
                              LLVMValueRef *addr, bool is_array_tex);
+LLVMValueRef
+ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned 
mask);
+
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
lane);
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
value, LLVMValueRef lane);
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
+
+LLVMValueRef
+ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op 
op);
+
+LLVMValueRef
+ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op 
op);
+
+LLVMValueRef
+ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, 
unsigned cluster_size);
+
+LLVMValueRef
+ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+               unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
+
+LLVMValueRef
+ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef 
index);
+
  #ifdef __cplusplus
  }
  #endif



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to