Hi,
This patch implements vector lowering for MULT_EXPR. This is a very simplistic
version of what has been done in expand_mult/vect_recog_mult_pattern, where we
use shifts only when multiplying with constants which are of exact power of 2.
Bootstrapped and regtested on ppc64le with no regression failures.
This patch does resolve the PR122065, but a better way would be to reuse the
code in tree_vect_patterns.cc:vect_synth_mult_by_constant, which handles a lot
more cases than just power of 2. But I see that the way the statements are
built in pattern recognition (gimple_build_assign, on gimple) vs when lowering
(gimplify_build2, on trees) is different.
Are there any suggestions on how to reuse this function?
Thanks and regards,
Avinash Jayakar
Use similar logic for lowering the vector operation for MULT_EXPR as done in
expand_mult in expmed.cc.
Previously, if the source code is written in a vector dialect, for example the
vector types of altivec.h, the vectorizer would lower the MULT_EXPR to scalar
variant if the target did not support the vector insn for that type. But better
code could be generated had it recognized the pattern and transformed it to
shifts.
For example, this code
vector unsigned long long
lshift1_64_altivec (vector unsigned long long a)
{
return a * (vector unsigned long long) { 4, 4 };
}
generates the scalar code in power8/9
.cfi_startproc
xxpermdi 0,34,34,3
mfvsrd 9,34
mfvsrd 10,0
sldi 9,9,2
mtvsrd 0,9
sldi 10,10,2
mtvsrd 34,10
xxpermdi 34,0,34,0
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.cfi_endproc
although it has a vector insn for left shift. With this change now the
following is generated
.cfi_startproc
lxvd2x 32,0,3
vspltisw 1,2
vsld 0,0,1
stxvd2x 32,0,3
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.cfi_endproc
2025-11-14 Avinash Jayakar <[email protected]>
gcc/ChangeLog:
PR vect/122065
* tree-vect-generic.cc (add_rshift): Update name and add code parameter.
(add_shift): Update name.
(expand_vector_mult): New lowering for MULT_EXPR.
(expand_vector_divmod): Use updated function name.
(expand_vector_operation): Use updated function name.
---
gcc/tree-vect-generic.cc | 70 +++++++++++++++++++++++++++++++---------
1 file changed, 54 insertions(+), 16 deletions(-)
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 3c68361870b..6d3572cf22c 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -460,7 +460,8 @@ expand_vector_comparison (gimple_stmt_iterator *gsi, tree
type, tree op0,
of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
the result if successful, otherwise return NULL_TREE. */
static tree
-add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
+add_shift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts,
+ enum tree_code code)
{
optab op;
unsigned int i, nunits = nunits_for_known_piecewise_op (type);
@@ -477,26 +478,59 @@ add_rshift (gimple_stmt_iterator *gsi, tree type, tree
op0, int *shiftcnts)
if (scalar_shift)
{
- op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
+ op = optab_for_tree_code (code, type, optab_scalar);
if (op != unknown_optab
&& can_implement_p (op, TYPE_MODE (type)))
- return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
+ return gimplify_build2 (gsi, code, type, op0,
build_int_cst (NULL_TREE, shiftcnts[0]));
}
- op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+ op = optab_for_tree_code (code, type, optab_vector);
if (op != unknown_optab
&& can_implement_p (op, TYPE_MODE (type)))
{
tree_vector_builder vec (type, nunits, 1);
for (i = 0; i < nunits; i++)
vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
- return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
+ return gimplify_build2 (gsi, code, type, op0, vec.build ());
}
return NULL_TREE;
}
+/* Try to expand integer vector multiplication by constant using
+ shifts, add, mult if native operation not supported. */
+static tree
+expand_vector_mult (gimple_stmt_iterator *gsi, tree type, tree op0,
+ tree op1)
+{
+ int prec = TYPE_PRECISION (TREE_TYPE (type));
+ optab op;
+ unsigned int nunits = nunits_for_known_piecewise_op (type);
+ int *shifts = XALLOCAVEC (int, nunits * 4);
+
+ if (prec > HOST_BITS_PER_WIDE_INT)
+ return NULL_TREE;
+ op = optab_for_tree_code (LSHIFT_EXPR, type, optab_vector);
+ if (op == unknown_optab
+ || !can_implement_p (op, TYPE_MODE (type)))
+ return NULL_TREE;
+
+ // if all element are same value and a power of 2, then we can use shifts
+ for (unsigned int i = 0; i < nunits; i++)
+ {
+ tree cst = VECTOR_CST_ELT (op1, i);
+ if ((TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
+ || !integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)
+ return NULL_TREE;
+
+ shifts[i] = tree_log2 (cst);
+ if (shifts[i] != shifts[0])
+ return NULL_TREE;
+ }
+ tree cur_op = add_shift (gsi, type, op0, shifts, LSHIFT_EXPR);
+ return cur_op;
+}
/* Try to expand integer vector division by constant using
widening multiply, shifts and additions. */
static tree
@@ -705,14 +739,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
type, tree op0,
{
for (i = 0; i < nunits; i++)
shift_temps[i] = prec - 1;
- cur_op = add_rshift (gsi, type, op0, shift_temps);
+ cur_op = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
if (cur_op != NULL_TREE)
{
cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
uns_type, cur_op);
for (i = 0; i < nunits; i++)
shift_temps[i] = prec - shifts[i];
- cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
+ cur_op = add_shift (gsi, uns_type, cur_op, shift_temps,
RSHIFT_EXPR);
if (cur_op != NULL_TREE)
addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
type, cur_op);
@@ -748,7 +782,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type,
tree op0,
if (sign_p == UNSIGNED)
{
/* q = op0 >> shift; */
- cur_op = add_rshift (gsi, type, op0, shifts);
+ cur_op = add_shift (gsi, type, op0, shifts, RSHIFT_EXPR);
if (cur_op != NULL_TREE)
return cur_op;
}
@@ -761,7 +795,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type,
tree op0,
&& can_implement_p (op, TYPE_MODE (type)))
{
cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
- cur_op = add_rshift (gsi, type, cur_op, shifts);
+ cur_op = add_shift (gsi, type, cur_op, shifts, RSHIFT_EXPR);
if (cur_op != NULL_TREE)
return cur_op;
}
@@ -823,7 +857,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type,
tree op0,
/* t1 = oprnd0 >> pre_shift;
t2 = t1 h* ml;
q = t2 >> post_shift; */
- cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
+ cur_op = add_shift (gsi, type, cur_op, pre_shifts, RSHIFT_EXPR);
if (cur_op == NULL_TREE)
return NULL_TREE;
break;
@@ -860,7 +894,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type,
tree op0,
/* t1 = oprnd0 >> pre_shift;
t2 = t1 h* ml;
q = t2 >> post_shift; */
- cur_op = add_rshift (gsi, type, cur_op, post_shifts);
+ cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
break;
case 1:
/* t1 = oprnd0 h* ml;
@@ -873,13 +907,13 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
type, tree op0,
|| !can_implement_p (op, TYPE_MODE (type)))
return NULL_TREE;
tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
- tem = add_rshift (gsi, type, tem, shift_temps);
+ tem = add_shift (gsi, type, tem, shift_temps, RSHIFT_EXPR);
op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
if (op == unknown_optab
|| !can_implement_p (op, TYPE_MODE (type)))
return NULL_TREE;
tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
- cur_op = add_rshift (gsi, type, tem, post_shifts);
+ cur_op = add_shift (gsi, type, tem, post_shifts, RSHIFT_EXPR);
if (cur_op == NULL_TREE)
return NULL_TREE;
break;
@@ -902,10 +936,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree
type, tree op0,
return NULL_TREE;
cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
}
- cur_op = add_rshift (gsi, type, cur_op, post_shifts);
+ cur_op = add_shift (gsi, type, cur_op, post_shifts, RSHIFT_EXPR);
if (cur_op == NULL_TREE)
return NULL_TREE;
- tem = add_rshift (gsi, type, op0, shift_temps);
+ tem = add_shift (gsi, type, op0, shift_temps, RSHIFT_EXPR);
if (tem == NULL_TREE)
return NULL_TREE;
op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
@@ -1130,6 +1164,7 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree
type, tree compute_type
case TRUNC_DIV_EXPR:
case TRUNC_MOD_EXPR:
+ case MULT_EXPR:
{
tree rhs1 = gimple_assign_rhs1 (assign);
tree rhs2 = gimple_assign_rhs2 (assign);
@@ -1141,7 +1176,10 @@ expand_vector_operation (gimple_stmt_iterator *gsi, tree
type, tree compute_type
|| !VECTOR_MODE_P (TYPE_MODE (type)))
break;
- ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
+ if (code == MULT_EXPR)
+ ret = expand_vector_mult (gsi, type, rhs1, rhs2);
+ else
+ ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
if (ret != NULL_TREE)
return ret;
break;
--
2.51.0