On Tue, 27 Jan 2026, Tamar Christina wrote:
> > > +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> > > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> > > index
> > 02e194ae06f34957194c4e4f2eb4fdb3ef72d2f5..aa12221a2b2b584fa10fe3
> > 78e16115128408ee3e 100644
> > > --- a/gcc/tree-ssa-math-opts.cc
> > > +++ b/gcc/tree-ssa-math-opts.cc
> > > @@ -3120,6 +3120,30 @@ convert_mult_to_fma_1 (tree mul_result, tree
> > op1, tree op2)
> > > if (is_gimple_debug (use_stmt))
> > > continue;
> > >
> > > + /* If the use is a type convert, look further into it if the
> > > operations
> > > + are the same under two's complement. */
> > > + tree lhs_type;
> > > + if (gimple_assign_cast_p (use_stmt)
> > > + && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
> > > + && TREE_CODE (lhs_type) == TREE_CODE (TREE_TYPE (op1))
> >
> > strict equality is going to be brittle, what are you trying to protect
> > against
> > with this?
> >
>
> It was capturing some conversions like (int)bool. But..
>
> > > + && (TYPE_UNSIGNED (lhs_type)
> > > + || (ANY_INTEGRAL_TYPE_P (lhs_type)
> > > + && !TYPE_OVERFLOW_WRAPS (lhs_type)))
> > > + && (element_precision (lhs_type)
> > > + == element_precision (gimple_assign_rhs1 (use_stmt))))
> >
> > I think that you want to simplify this to tree_nop_conversion_p and make
> > sure to perform the FMA in a wrapping type if you looked through one - that
> > would also allow the reverse sign conversion case.
>
> I hadn't found tree_nop_conversion_p before and that's indeed much cleaner.
>
> >
> > > + {
> > > + tree cast_lhs = gimple_get_lhs (use_stmt);
> > > + gimple *tmp_use;
> > > + use_operand_p tmp_use_p;
> > > + if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
> > > + {
> > > + use_stmt = tmp_use;
> > > + result = cast_lhs;
> > > + gsi_remove (&gsi, true);
> >
> > release_defs missing?
> >
> > > + gsi = gsi_for_stmt (use_stmt);
> > > + }
> > > + }
> > > +
> > > if (is_gimple_assign (use_stmt)
> > > && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
> > > {
> > > @@ -3156,6 +3180,11 @@ convert_mult_to_fma_1 (tree mul_result, tree
> > op1, tree op2)
> > > if (negate_p)
> > > mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
> > >
> > > + /* Ensure all the operands are of the same type Use the type of the
> > > + addend as that's the statement being replaced. */
> > > + op2 = gimple_convert (&seq, TREE_TYPE (addop), op2);
> > > + mulop1 = gimple_convert (&seq, TREE_TYPE (addop), mulop1);
> > > +
> >
> > In your code example I see back-and-forth conversion because of the use of
> > gimple_convert with a 'seq' - if we'd use the 'gsi' overloads that would be
> > avoided by also match-and-simplifying with other stmts in the IL.
> >
>
> Ack. I had expected the fold_stmt call on the final FMA to take care of it,
> but
> changed to the gsi variant.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Ok for master?
OK.
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/122749
> * tree-ssa-math-opts.cc (convert_mult_to_fma_1, convert_mult_to_fma):
> Unwrap converts around addend.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/122749
> * gcc.target/aarch64/pr122749_1.c: New test.
> * gcc.target/aarch64/pr122749_2.c: New test.
> * gcc.target/aarch64/pr122749_3.c: New test.
> * gcc.target/aarch64/pr122749_4.c: New test.
> * gcc.target/aarch64/pr122749_5.c: New test.
> * gcc.target/aarch64/pr122749_6.c: New test.
> * gcc.target/aarch64/pr122749_8.c: New test.
> * gcc.target/aarch64/pr122749_9.c: New test.
> * gcc.target/aarch64/sve/pr122749_1.c: New test.
> * gcc.target/aarch64/sve/pr122749_11.c: New test.
> * gcc.target/aarch64/sve/pr122749_12.c: New test.
> * gcc.target/aarch64/sve/pr122749_13.c: New test.
> * gcc.target/aarch64/sve/pr122749_14.c: New test.
> * gcc.target/aarch64/sve/pr122749_2.c: New test.
> * gcc.target/aarch64/sve/pr122749_3.c: New test.
> * gcc.target/aarch64/sve/pr122749_4.c: New test.
> * gcc.target/aarch64/sve/pr122749_5.c: New test.
> * gcc.target/aarch64/sve/pr122749_6.c: New test.
> * gcc.target/aarch64/sve/pr122749_8.c: New test.
> * gcc.target/aarch64/sve/pr122749_9.c: New test.
>
> -- inline copy of patch --
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..25311fce4e3a79b389cbb750231c1277ccaf0611
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_1.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int8_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-*
> } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..f4a70a611176893e9fa55d8bc1826805ed5d966d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_2.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int16_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-*
> } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..61bcd30be2b47f482e8b3f0a024b2a1d51c4fda7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_3.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int32_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-*
> } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..6089716b0ca7498f9b8089f1b72d2968b1c2ee76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_4.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +typedef float elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..562dc5be861762272ea8d23b8304e1abb439e20f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_5.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +typedef double elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.FMA" 2 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..3e51c5e22a18a9a3acd2416c3ba72496c9621adf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_6.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fwrapv -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +typedef float elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.FMA" 4 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..6aa729c13d1616273d579077253d3fcdf55cc555
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_8.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint8_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-*
> } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
> b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..d987a9936afb2cb4ba19e62736fa4ed171669e25
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr122749_9.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target arm_v8_neon_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 --param
> vect-epilogues-nomask=0 -fdump-tree-vect-details -fdump-tree-widening_mul" }
> */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint16_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\.FMA" 1 "widening_mul" { xfail *-*-*
> } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..32a36461fbc7bb78048ae68c8dc0bdd81b11a2cd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_1.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int8_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT8_MAX, INT8_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..bd160dd0ebf515a3ff3ddd1969303aabf8c03aea
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_11.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint8_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT8_MAX, 7, 0, UINT8_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..8f0198ce42600b0fe92bf483123ad1cb71ff9f24
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_12.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint16_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT16_MAX, 7, 0, UINT16_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..218afde13984fc64755d3c4567a05a33b5485411
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_13.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint32_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT32_MAX, 7, 0, UINT32_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..1587628757e28f66dfd515e191ef04331c549434
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_14.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef uint64_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, 2, UINT64_MAX, 7, 0, UINT64_MAX, 5, 9 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..0f5918a9023521b06ac20ef922b025dc6a1e8f01
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_2.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int16_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT16_MAX, INT16_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..92548cb6ec4fdc4a3d133669fb914c5ab9a103ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_3.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int32_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT32_MAX, INT32_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..6085a18bab7f2ae0e5855a982e186f831705bf52
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_4.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +#include <limits.h>
> +#include <stdint.h>
> +
> +typedef int64_t elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1, -2, INT64_MAX, INT64_MIN, 5, -7, 3, -4 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..d61b91bb06dc0a035bd6adfabccc580eac7f78a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_5.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +typedef float elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..7598f7a28bcf1745ce672c0bab22fec0fda37a3f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_6.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fdump-tree-vect-details
> -fdump-tree-widening_mul" } */
> +
> +typedef double elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..e1c337d44ead96d868d71f0ae54960f2189e499e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_8.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv
> -fdump-tree-vect-details -fdump-tree-widening_mul" } */
> +
> +typedef float elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..13d962e2130f986910f1a94489e4014761e917b5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122749_9.c
> @@ -0,0 +1,45 @@
> +/* { dg-do run { target aarch64_sve_hw } } */
> +/* { dg-additional-options "-Ofast -std=gnu99 -fwrapv
> -fdump-tree-vect-details -fdump-tree-widening_mul" } */
> +
> +typedef double elem_t;
> +
> +__attribute__ ((noipa))
> +elem_t
> +foo2 (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +static elem_t
> +reference (elem_t *buf, int len)
> +{
> + elem_t x = 0;
> +
> +#pragma GCC novector
> + for (int i = 0; i < len; i++)
> + x += (elem_t) i * buf[i];
> +
> + return x;
> +}
> +
> +int
> +main (void)
> +{
> + elem_t buf[] = { 1.0, 2.0, 1.0, 2.0, 1.0, 2.0 };
> + int len = sizeof (buf) / sizeof (buf[0]);
> + elem_t want = reference (buf, len);
> + elem_t got = foo2 (buf, len);
> +
> + if (want != got)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "\\.COND_FMA" 1 "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index
> 4c3fb0f4fc5313199357d19ab809a7d8d88ed2d6..4b50a96ad3aa19857c5b8436ee8d6d3080d3c9ed
> 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -3120,6 +3120,26 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree
> op2)
> if (is_gimple_debug (use_stmt))
> continue;
>
> + /* If the use is a type convert, look further into it if the operations
> + are the same under two's complement. */
> + tree lhs_type;
> + if (gimple_assign_cast_p (use_stmt)
> + && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
> + && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
> + {
> + tree cast_lhs = gimple_get_lhs (use_stmt);
> + gimple *tmp_use;
> + use_operand_p tmp_use_p;
> + if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
> + {
> + release_defs (use_stmt);
> + use_stmt = tmp_use;
> + result = cast_lhs;
> + gsi_remove (&gsi, true);
> + gsi = gsi_for_stmt (use_stmt);
> + }
> + }
> +
> if (is_gimple_assign (use_stmt)
> && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
> {
> @@ -3159,6 +3179,13 @@ convert_mult_to_fma_1 (tree mul_result, tree op1, tree
> op2)
> if (seq)
> gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
>
> + /* Ensure all the operands are of the same type. Use the type of the
> + addend as that's the statement being replaced. */
> + op2 = gimple_convert (&gsi, true, GSI_SAME_STMT,
> + UNKNOWN_LOCATION, TREE_TYPE (addop), op2);
> + mulop1 = gimple_convert (&gsi, true, GSI_SAME_STMT,
> + UNKNOWN_LOCATION, TREE_TYPE (addop), mulop1);
> +
> if (len)
> fma_stmt
> = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
> @@ -3419,6 +3446,20 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree
> op2,
> if (is_gimple_debug (use_stmt))
> continue;
>
> + /* If the use is a type convert, look further into it if the operations
> + are the same under two's complement. */
> + tree lhs_type;
> + if (gimple_assign_cast_p (use_stmt)
> + && (lhs_type = TREE_TYPE (gimple_get_lhs (use_stmt)))
> + && tree_nop_conversion_p (lhs_type, TREE_TYPE (op1)))
> + {
> + tree cast_lhs = gimple_get_lhs (use_stmt);
> + gimple *tmp_use;
> + use_operand_p tmp_use_p;
> + if (single_imm_use (cast_lhs, &tmp_use_p, &tmp_use))
> + use_stmt = tmp_use;
> + }
> +
> /* For now restrict this operations to single basic blocks. In theory
> we would want to support sinking the multiplication in
> m = a*b;
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)