The following adds named patterns for reducing of vector masks with
AND, IOR and XOR to be used by the vectorizer. A slight complication
are targets using scalar integer modes as mask modes, as for those
the mode for low-precision masks is ambiguous. For this reason the
optab follows what vec_pack_sbool_trunc does and passes an additional
CONST_INT operand indicating the number of lanes in the input mask.
Note this is done always when the vector mask mode is an integer mode
and never otherwise.
Note [1/3] of the previous series is unchanged, I have not reposted it.
I'll merge that and [2/2] of this series and play with the patterns
Hongtao provided in the bugzilla for x86.
* doc/md.texi (reduc_sbool_{and,ior,xor}_scal_<mode>): Document.
* optabs.def (reduc_sbool_and_scal_optab,
reduc_sbool_ior_scal_optab, reduc_sbool_xor_scal_optab): New.
* internal-fn.def (REDUC_SBOOL_AND, REDUC_SBOOL_IOR,
REDUC_SBOO_XOR): Likewise.
* internal-fn.cc (reduc_sbool_direct): New initializer.
(expand_reduc_sbool_optab_fn): New expander.
(direct_reduc_sbool_optab_supported_p): New.
---
gcc/doc/md.texi | 13 +++++++++++++
gcc/internal-fn.cc | 35 +++++++++++++++++++++++++++++++++++
gcc/internal-fn.def | 7 +++++++
gcc/optabs.def | 3 +++
4 files changed, 58 insertions(+)
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 44e1149bea8..aa0141314cf 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5736,6 +5736,19 @@ of a vector of mode @var{m}. Operand 1 is the vector
input and operand 0
is the scalar result. The mode of the scalar result is the same as one
element of @var{m}.
+@cindex @code{reduc_sbool_and_scal_@var{m}} instruction pattern
+@cindex @code{reduc_sbool_ior_scal_@var{m}} instruction pattern
+@cindex @code{reduc_sbool_xor_scal_@var{m}} instruction pattern
+@item @samp{reduc_sbool_and_scal_@var{m}}
+@itemx @samp{reduc_sbool_ior_scal_@var{m}}
+@itemx @samp{reduc_sbool_xor_scal_@var{m}}
+Compute the bitwise @code{AND}/@code{IOR}/@code{XOR} reduction of the elements
+of a vector boolean of mode @var{m}. Operand 1 is the vector input and
+operand 0 is the scalar result. The mode of the scalar result is @var{QImode}
+with its value either zero or one. If mode @var{m} is a scalar integer mode
+then operand 2 is the number of elements in the input vector to provide
+disambiguation for the case @var{m} is ambiguous.
+
@cindex @code{extract_last_@var{m}} instruction pattern
@item @code{extract_last_@var{m}}
Find the last set bit in mask operand 1 and extract the associated element
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index fb76e64e53e..cf9f0c66e0c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -194,6 +194,7 @@ init_internal_fns ()
#define mask_len_fold_left_direct { 1, 1, false }
#define check_ptrs_direct { 0, 0, false }
#define crc_direct { 1, -1, true }
+#define reduc_sbool_direct { 0, 0, true }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -4099,6 +4100,39 @@ expand_crc_optab_fn (internal_fn fn, gcall *stmt,
convert_optab optab)
}
}
+/* Expand .REDUC_SBOOL_{AND,IOR,XOR}. */
+
+static void
+expand_reduc_sbool_optab_fn (internal_fn fn, gcall *stmt, direct_optab optab)
+{
+ tree_pair types = direct_internal_fn_types (fn, stmt);
+ insn_code icode = direct_optab_handler (optab, TYPE_MODE (types.first));
+
+ /* Below copied from expand_fn_using_insn. */
+
+ gcc_assert (icode != CODE_FOR_nothing);
+
+ expand_operand *ops = XALLOCAVEC (expand_operand, 3);
+ rtx lhs_rtx = NULL_RTX;
+ tree lhs = gimple_call_lhs (stmt);
+ if (lhs)
+ lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ create_call_lhs_operand (&ops[0], lhs_rtx,
+ insn_data[icode].operand[0].mode);
+
+ tree rhs = gimple_call_arg (stmt, 0);
+ tree rhs_type = TREE_TYPE (rhs);
+ rtx rhs_rtx = expand_normal (rhs);
+ gcc_assert (VECTOR_BOOLEAN_TYPE_P (rhs_type));
+ create_input_operand (&ops[1], rhs_rtx, TYPE_MODE (rhs_type));
+ if (SCALAR_INT_MODE_P (TYPE_MODE (rhs_type)))
+ create_input_operand (&ops[2], GEN_INT (TYPE_VECTOR_SUBPARTS
+ (rhs_type).to_constant ()), SImode);
+ expand_insn (icode, SCALAR_INT_MODE_P (TYPE_MODE (rhs_type)) ? 3 : 2, ops);
+ if (lhs_rtx)
+ assign_call_lhs (lhs, lhs_rtx, &ops[0]);
+}
+
/* Expanders for optabs that can use expand_direct_optab_fn. */
#define expand_unary_optab_fn(FN, STMT, OPTAB) \
@@ -4261,6 +4295,7 @@ multi_vector_optab_supported_p (convert_optab optab,
tree_pair types,
#define direct_check_ptrs_optab_supported_p direct_optab_supported_p
#define direct_vec_set_optab_supported_p direct_optab_supported_p
#define direct_vec_extract_optab_supported_p convert_optab_supported_p
+#define direct_reduc_sbool_optab_supported_p direct_optab_supported_p
/* Return the optab used by internal function FN. */
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 8434a805e28..7874fcfb3df 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -330,6 +330,13 @@ DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
reduc_ior_scal, unary)
DEF_INTERNAL_OPTAB_FN (REDUC_XOR, ECF_CONST | ECF_NOTHROW,
reduc_xor_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_AND, ECF_CONST | ECF_NOTHROW,
+ reduc_sbool_and_scal, reduc_sbool)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_IOR, ECF_CONST | ECF_NOTHROW,
+ reduc_sbool_ior_scal, reduc_sbool)
+DEF_INTERNAL_OPTAB_FN (REDUC_SBOOL_XOR, ECF_CONST | ECF_NOTHROW,
+ reduc_sbool_xor_scal, reduc_sbool)
+
/* Extract the last active element from a vector. */
DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 790e43f08f4..d13e0b5b848 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -401,6 +401,9 @@ OPTAB_D (reduc_umin_scal_optab, "reduc_umin_scal_$a")
OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a")
OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a")
OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a")
+OPTAB_D (reduc_sbool_and_scal_optab, "reduc_sbool_and_scal_$a")
+OPTAB_D (reduc_sbool_ior_scal_optab, "reduc_sbool_ior_scal_$a")
+OPTAB_D (reduc_sbool_xor_scal_optab, "reduc_sbool_xor_scal_$a")
OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
--
2.51.0