On Fri, 19 Nov 2021, Tamar Christina wrote: > Hi All, > > Apologies, I got dinged by the i386 regressions bot for a test I didn't have > in > my tree at the time I made the previous patch. The bot was telling me that > FMA > stopped working after I strengthened the FMA check in the previous patch. > > The reason is that the check is slightly early. The first check can indeed > only > exit early when either node isn't a mult. However we need to delay till we > know > if the node is a MUL or FMA before enforcing that both nodes must be a MULT > since the node to inspect is different if the operation is a MUL or FMA. > > Also with the update patch for GCC 11 tree layout update to the new GCC 12 one > I had missed that the difference in which node is conjucated is not > symmetrical. > > So the test for it can just be testing the inverse order. It was Currently > no detecting when the first node was conjucated instead of the second one. > > This also made me wonder why my own test didn't detect this. It turns out > that > the tests, being copied from the _Float16 ones were incorrectly marked as > xfail. The _Float16 ones are marked as xfail since C doesn't have a conj > operation for _Float16, which means you get extra type-casts in between. > > While you could use the GCC _Complex extension here I opted to mark them xfail > since I wanted to include detection over the widenings next year. > > Secondly the double tests were being skipped because Adv. SIMD was missing > from > targets supporting Complex Double vectorization. > > With these changes all other tests run and pass and only XFAIL ones are > correctly the _Float16 ones. Sorry for missing this before, testing should > now > cover all cases. > > Bootstrapped Regtested on aarch64-none-linux-gnu, > x86_64-pc-linux-gnu and no regressions. > > Ok for master?
OK. Thanks, Richard. > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/103311 > * tree-vect-slp-patterns.c (vect_validate_multiplication): Fix CONJ > test to new codegen. > (complex_mul_pattern::matches): Move check downwards. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/103311 > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c: Fix it. > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c: Likewise. > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c: Likewise. > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c: Likewise. > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c: Likewise. > * gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c: Likewise. > * lib/target-supports.exp > (check_effective_target_vect_complex_add_double): Add Adv. SIMD. > > --- inline copy of patch -- > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c > index > 462063abc3041d466e8b0261252054a46ef48e15..e13fa3bd31bab016c7a033a7ed590047c757882b > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-double.c > @@ -6,5 +6,5 @@ > #define N 16 > #include "complex-mla-template.c" > > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "slp1" } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "vect" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "vect" } } */ > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c > index > a88adc8184ece3d6d61c66a42233fde0f1721a78..2d774315df96a37759021ab64e0bdb4eb6292f6e > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mla-float.c > @@ -6,5 +6,5 @@ > #define TYPE float > #define N 16 > #include "complex-mla-template.c" > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "slp1" { xfail *-*-* > } } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" { xfail *-*-* } } > } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "vect" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "vect" } } */ > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c > index > a434fd1f1d3235392a0240b5861c85d39c48d551..bfb62f21f972c5a02af4952d2db1799f37011cad > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-double.c > @@ -6,7 +6,5 @@ > #define N 16 > #include "complex-mls-template.c" > > -/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMS_CONJ" "slp1" } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMS" "slp1" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMS_CONJ" "vect" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMS" "vect" } } */ > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c > index > b7ccbbdb75743182022465243efcb128ef97fd0c..930bd11d7fc33ab70ee0dc2a716f12e37ea87290 > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mls-float.c > @@ -7,5 +7,5 @@ > #define N 16 > #include "complex-mls-template.c" > > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMS_CONJ" "slp1" { xfail > *-*-* } } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_FMS" "slp1" { xfail *-*-* } } > } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMS_CONJ" "vect" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_FMS" "vect" } } */ > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c > index > f7e9386334e1b259d43d35f4bc6b79010d020e6a..86904ea72667a2534a5a8a2447563f2230d3c7db > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-double.c > @@ -6,5 +6,5 @@ > #define N 16 > #include "complex-mul-template.c" > > -/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" slp1" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "vect" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "vect" } } */ > diff --git > a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c > b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c > index > 0dc9c52555640c34f794e0f57c04f3ea0510fdc9..27280ae2ba4dfaee1a547d43f75240a7634caa9b > 100644 > --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c > +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-mul-float.c > @@ -7,5 +7,5 @@ > #define N 16 > #include "complex-mul-template.c" > > -/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" { xfail > *-*-* } } } */ > -/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1" { xfail *-*-* } } > } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" } } */ > +/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1" } } */ > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index > e07d563f6f4e39aa6264ecf9074db7be75dae009..d22270659ddcaf8fbb456e409328c8473fb43d9a > 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -3632,8 +3632,10 @@ proc check_effective_target_vect_complex_add_float { } > { > proc check_effective_target_vect_complex_add_double { } { > return [check_cached_effective_target_indexed vect_complex_add_double { > expr { > - ([check_effective_target_aarch64_sve2] > - && [check_effective_target_aarch64_little_endian]) > + (([check_effective_target_arm_v8_3a_complex_neon_ok] > + && [check_effective_target_aarch64_little_endian]) > + || ([check_effective_target_aarch64_sve2] > + && [check_effective_target_aarch64_little_endian])) > }}] > } > > diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c > index > d916fc9cef9a86a0d797caaf2b2f253685e9b7dd..0ee1fab2ed527183a4f0af375d1d2b609b2e092f > 100644 > --- a/gcc/tree-vect-slp-patterns.c > +++ b/gcc/tree-vect-slp-patterns.c > @@ -809,14 +809,20 @@ vect_validate_multiplication > (slp_tree_to_load_perm_map_t *perm_cache, > if (linear_loads_p (perm_cache, left_op[index2]) == PERM_EVENODD) > return true; > } > - else if (kind == PERM_EVENODD) > + else if (kind == PERM_EVENODD && !neg_first) > { > - if ((kind = linear_loads_p (perm_cache, left_op[index2])) == > PERM_EVENODD) > + if ((kind = linear_loads_p (perm_cache, left_op[index2])) != > PERM_EVENEVEN) > return false; > return true; > } > - else if (!neg_first) > - *conj_first_operand = true; > + else if (kind == PERM_EVENEVEN && neg_first) > + { > + if ((kind = linear_loads_p (perm_cache, left_op[index2])) != > PERM_EVENODD) > + return false; > + > + *conj_first_operand = true; > + return true; > + } > else > return false; > > @@ -949,7 +955,7 @@ complex_mul_pattern::matches (complex_operation_t op, > > bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR); > bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR); > - if (!mul0 || !mul1) > + if (!mul0 && !mul1) > return IFN_LAST; > > /* Now operand2+4 may lead to another expression. */ > @@ -979,6 +985,8 @@ complex_mul_pattern::matches (complex_operation_t op, > > if (left_op.length () != 2 > || right_op.length () != 2 > + || !mul1 > + || (!mul0 && !vect_match_expression_p (left_op[1], MULT_EXPR)) > || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN) > return IFN_LAST; > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)