Hi,
This implements mask reductions by first counting the bits in the mask
(vcpop.m) and then comparing the resulting scalar against 0 or len.
Regtested on rv64gcv_zvl512b.
Regards
Robin
gcc/ChangeLog:
* config/riscv/autovec.md (reduc_sbool_and_scal_<mode>): New
expander.
(reduc_sbool_ior_scal_<mode>): Ditto.
(reduc_sbool_xor_scal_<mode>): Ditto.
* config/riscv/riscv-protos.h (expand_mask_reduction): Declare.
* config/riscv/riscv-v.cc (expand_mask_reduction): New function.
---
gcc/config/riscv/autovec.md | 31 +++++++++++++++++++++
gcc/config/riscv/riscv-protos.h | 1 +
gcc/config/riscv/riscv-v.cc | 48 +++++++++++++++++++++++++++++++++
3 files changed, 80 insertions(+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 48de5efdde5..e69a3a33db1 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2300,6 +2300,37 @@ (define_expand "reduc_xor_scal_<mode>"
DONE;
})
+;; -------------------------------------------------------------------------
+;; ---- [INT] Mask reductions
+;; -------------------------------------------------------------------------
+
+(define_expand "reduc_sbool_and_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, AND);
+ DONE;
+})
+
+(define_expand "reduc_sbool_ior_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, IOR);
+ DONE;
+})
+
+(define_expand "reduc_sbool_xor_scal_<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:VB_VLS 1 "register_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_mask_reduction (operands, XOR);
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [FP] Tree reductions
;; -------------------------------------------------------------------------
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 013b1ddff69..c56b95280d0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -658,6 +658,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
+void expand_mask_reduction (rtx *, rtx_code);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 707924db6a3..7ca183cde22 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4946,6 +4946,54 @@ expand_reduction (unsigned unspec, unsigned
unspec_for_vl0_safe,
emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2));
}
+/* Expand mask reductions. OPS are {dest, src} where DEST's mode
+ is QImode and SRC's mode is a mask mode.
+ CODE is one of AND, IOR, XOR. */
+
+void
+expand_mask_reduction (rtx *ops, rtx_code code)
+{
+ machine_mode mode = GET_MODE (ops[1]);
+ rtx dest = ops[0];
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ rtx tmp = gen_reg_rtx (Xmode);
+ rtx cpop_ops[] = {tmp, ops[1]};
+ emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops);
+
+ bool eq_zero = false;
+
+ /* AND reduction is popcount (mask) == len,
+ IOR reduction is popcount (mask) != 0,
+ XOR reduction is popcount (mask) & 1 != 0. */
+ if (code == AND)
+ {
+ rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode);
+ tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true,
+ OPTAB_DIRECT);
+ eq_zero = true;
+ }
+ else if (code == IOR)
+ ;
+ else if (code == XOR)
+ tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true,
+ OPTAB_DIRECT);
+ else
+ gcc_unreachable ();
+
+ rtx els = gen_label_rtx ();
+ rtx end = gen_label_rtx ();
+
+ riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx);
+ emit_move_insn (dest, const0_rtx);
+ emit_jump_insn (gen_jump (end));
+ emit_barrier ();
+
+ emit_label (els);
+ emit_move_insn (dest, const1_rtx);
+ emit_label (end);
+}
+
/* Prepare ops for ternary operations.
It can be called before or after RA. */
void
--
2.51.0