> > +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > index
> 02e194ae06f34957194c4e4f2eb4fdb3ef72d2f5..aa12221a2b2b584fa10fe3
> 78e16115128408ee3e 100644
> > --- a/gcc/tree-ssa-math-opts.cc
> > +++ b/gcc/tree-ssa-math-opts.cc
> > @@ -3120,6 +3120,30 @@ convert_mult_to_fma_1 (tree mul_result, tree
> op1, tree op2)
> > if (is_gimple_debug (use_stmt))
> > continue;
> >
> > + /* If the use is a type convert, look further into it if the
> > operations
> > + are the same under two's complement. */
> > + tree lhs_type;
> > + if (gimple_assign_cast_p (use_stmt)
> > + && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
> > + && TREE_CODE (lhs_type) == TREE_CODE (TREE_TYPE (op1))
>
> strict equality is going to be brittle, what are you trying to protect against
> with this?
>
It was capturing some conversions like (int)bool. But..
> > + && (TYPE_UNSIGNED (lhs_type)
> > + || (ANY_INTEGRAL_TYPE_P (lhs_type)
> > + && !TYPE_OVERFLOW_WRAPS (lhs_type)))
> > + && (element_precision (lhs_type)
> > + == element_precision (gimple_assign_rhs1 (use_stmt))))
>
> I think that you want to simplify this to tree_nop_conversion_p and make
> sure to perform the FMA in a wrapping type if you looked through one - that
> would also allow the reverse sign conversion case.
I hadn't found tree_nop_conversion_p before and that's indeed much cleaner.
>
> > + {
> > + tree cast_lhs = gimple_get_lhs (use_stmt);
> > + gimple *tmp_use;
> > + use_operand_p tmp_use_p;
> > + if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
> > + {
> > + use_stmt = tmp_use;
> > + result = cast_lhs;
> > + gsi_remove (&gsi, true);
>
> release_defs missing?
>
> > + gsi = gsi_for_stmt (use_stmt);
> > + }
> > + }
> > +
> > if (is_gimple_assign (use_stmt)
> > && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
> > {
> > @@ -3156,6 +3180,11 @@ convert_mult_to_fma_1 (tree mul_result, tree
> op1, tree op2)
> > if (negate_p)
> > mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
> >
> > + /* Ensure all the operands are of the same type Use the type of the
> > + addend as that's the statement being replaced. */
> > + op2 = gimple_convert (&seq, TREE_TYPE (addop), op2);
> > + mulop1 = gimple_convert (&seq, TREE_TYPE (addop), mulop1);
> > +
>
> In your code example I see back-and-forth conversion because of the use of
> gimple_convert with a 'seq' - if we'd use the 'gsi' overloads that would be
> avoided by also match-and-simplifying with other stmts in the IL.
>
Ack. I had expected the fold_stmt call on the final FMA to take care of it, but
changed to the gsi variant.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/122749
* tree-ssa-math-opts.cc (convert_mult_to_fma_1, convert_mult_to_fma):
Unwrap converts around addend.
gcc/testsuite/ChangeLog:
PR tree-optimization/122749
* gcc.target/aarch64/pr122749_1.c: New test.
* gcc.target/aarch64/pr122749_2.c: New test.
* gcc.target/aarch64/pr122749_3.c: New test.
* gcc.target/aarch64/pr122749_4.c: New test.
* gcc.target/aarch64/pr122749_5.c: New test.
* gcc.target/aarch64/pr122749_6.c: New test.
* gcc.target/aarch64/pr122749_8.c: New test.
* gcc.target/aarch64/pr122749_9.c: New test.
* gcc.target/aarch64/sve/pr122749_1.c: New test.
* gcc.target/aarch64/sve/pr122749_11.c: New test.
* gcc.target/aarch64/sve/pr122749_12.c: New test.
* gcc.target/aarch64/sve/pr122749_13.c: New test.
* gcc.target/aarch64/sve/pr122749_14.c: New test.
* gcc.target/aarch64/sve/pr122749_2.c: New test.
* gcc.target/aarch64/sve/pr122749_3.c: New test.
* gcc.target/aarch64/sve/pr122749_4.c: New test.
* gcc.target/aarch64/sve/pr122749_5.c: New test.
* gcc.target/aarch64/sve/pr122749_6.c: New test.
* gcc.target/aarch64/sve/pr122749_8.c: New test.
* gcc.target/aarch64/sve/pr122749_9.c: New test.
-- inline copy of patch --
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
new file mode 100644
index
0000000000000000000000000000000000000000..25311fce4e3a79b389cbb750231c1277ccaf0611
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* }
} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..f4a70a611176893e9fa55d8bc1826805ed5d966d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* }
} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
new file mode 100644
index
0000000000000000000000000000000000000000..61bcd30be2b47f482e8b3f0a024b2a1d51c4fda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* }
} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
new file mode 100644
index
0000000000000000000000000000000000000000..6089716b0ca7498f9b8089f1b72d2968b1c2ee76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
new file mode 100644
index
0000000000000000000000000000000000000000..562dc5be861762272ea8d23b8304e1abb439e20f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 2 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
new file mode 100644
index
0000000000000000000000000000000000000000..3e51c5e22a18a9a3acd2416c3ba72496c9621adf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fwrapv -fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
new file mode 100644
index
0000000000000000000000000000000000000000..6aa729c13d1616273d579077253d3fcdf55cc555
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* }
} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
new file mode 100644
index
0000000000000000000000000000000000000000..d987a9936afb2cb4ba19e62736fa4ed171669e25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target arm_v8_neon_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 --param vect-epilogues-nomask=0
-fdump-tree-vect-details -fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-* }
} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
new file mode 100644
index
0000000000000000000000000000000000000000..32a36461fbc7bb78048ae68c8dc0bdd81b11a2cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
new file mode 100644
index
0000000000000000000000000000000000000000..bd160dd0ebf515a3ff3ddd1969303aabf8c03aea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint8_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
new file mode 100644
index
0000000000000000000000000000000000000000..8f0198ce42600b0fe92bf483123ad1cb71ff9f24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
new file mode 100644
index
0000000000000000000000000000000000000000..218afde13984fc64755d3c4567a05a33b5485411
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT32_MAX, 7, 0, UINT32_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
new file mode 100644
index
0000000000000000000000000000000000000000..1587628757e28f66dfd515e191ef04331c549434
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef uint64_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, 2, UINT64_MAX, 7, 0, UINT64_MAX, 5, 9 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
new file mode 100644
index
0000000000000000000000000000000000000000..0f5918a9023521b06ac20ef922b025dc6a1e8f01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int16_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
new file mode 100644
index
0000000000000000000000000000000000000000..92548cb6ec4fdc4a3d133669fb914c5ab9a103ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int32_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
new file mode 100644
index
0000000000000000000000000000000000000000..6085a18bab7f2ae0e5855a982e186f831705bf52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+#include <limits.h>
+#include <stdint.h>
+
+typedef int64_t elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1, -2, INT64_MAX, INT64_MIN, 5, -7, 3, -4 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
new file mode 100644
index
0000000000000000000000000000000000000000..d61b91bb06dc0a035bd6adfabccc580eac7f78a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
new file mode 100644
index
0000000000000000000000000000000000000000..7598f7a28bcf1745ce672c0bab22fec0fda37a3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
new file mode 100644
index
0000000000000000000000000000000000000000..e1c337d44ead96d868d71f0ae54960f2189e499e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+typedef float elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
new file mode 100644
index
0000000000000000000000000000000000000000..13d962e2130f986910f1a94489e4014761e917b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv -fdump-tree-vect-details
-fdump-tree-widening_mul" } */
+
+typedef double elem_t;
+
+__attribute__ ((noipa))
+elem_t
+foo2 (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+static elem_t
+reference (elem_t *buf, int len)
+{
+ elem_t x = 0;
+
+#pragma GCC novector
+ for (int i = 0; i < len; i++)
+ x += (elem_t) i * buf[i];
+
+ return x;
+}
+
+int
+main (void)
+{
+ elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
+ int len = sizeof (buf) / sizeof (buf[0]);
+ elem_t want = reference (buf, len);
+ elem_t got = foo2 (buf, len);
+
+ if (want != got)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index
4c3fb0f4fc5313199357d19ab809a7d8d88ed2d6..4b50a96ad3aa19857c5b8436ee8d6d3080d3c9ed
100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -3120,6 +3120,26 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree
op2)
if (is_gimple_debug (use_stmt))
continue;
+ /* If the use is a type convert, look further into it if the operations
+ are the same under two's complement. */
+ tree lhs_type;
+ if (gimple_assign_cast_p (use_stmt)
+ && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
+ && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
+ {
+ tree cast_lhs = gimple_get_lhs (use_stmt);
+ gimple *tmp_use;
+ use_operand_p tmp_use_p;
+ if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
+ {
+ release_defs (use_stmt);
+ use_stmt = tmp_use;
+ result = cast_lhs;
+ gsi_remove (&gsi, true);
+ gsi = gsi_for_stmt (use_stmt);
+ }
+ }
+
if (is_gimple_assign (use_stmt)
&& gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
{
@@ -3159,6 +3179,13 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree
op2)
if (seq)
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
+ /* Ensure all the operands are of the same type. Use the type of the
+ addend as that's the statement being replaced. */
+ op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
+ UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
+ mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
+ UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
+
if (len)
fma_stmt
= gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
@@ -3419,6 +3446,20 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree
op2,
if (is_gimple_debug (use_stmt))
continue;
+ /* If the use is a type convert, look further into it if the operations
+ are the same under two's complement. */
+ tree lhs_type;
+ if (gimple_assign_cast_p (use_stmt)
+ && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
+ && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
+ {
+ tree cast_lhs = gimple_get_lhs (use_stmt);
+ gimple *tmp_use;
+ use_operand_p tmp_use_p;
+ if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
+ use_stmt = tmp_use;
+ }
+
/* For now restrict this operations to single basic blocks. In theory
we would want to support sinking the multiplication in
m = a*b;
rb20199.patch
Description: rb20199.patch
