On Thu, 25 Feb 2021, Tamar Christina wrote:

> Hi All,
> 
> The given testcase shows that one of the children of the complex MUL contains 
> a
> PHI node.  This results in the vectorizer having a child that's (nil).
> 
> The pattern matcher handles this correctly, but optimize_load_redistribution_1
> needs to not traverse/inspect the NULL nodes.
> 
> This however does high-light a missed opportunity.  This testcase seems to
> result in a different canonicalization than normally.
> 
> Normally the expressions are right leaning.  But sometimes, especially when 
> type
> casts are introduced the trees suddenly become left leaning. For instance this
> testcase (even without type casts) won't detect the FMA form because the 
> addition
> gets the MUL node in the left and not right node as it expects.

There's not really any canonicalization rule that would enforce either 
order.  The reassoc pass might in practice produce such ordering but
you can't rely on it.

> Checking all forms would be quite expensive so for GCC 12 it probably makes 
> sense to make
> forms with type casts in them have the same form as those without?

Is it really expensive?

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Committed under the GCC obvious rule.

Thanks.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * tree-vect-slp.c (optimize_load_redistribution_1): Abort on NULL nodes.
> 
> gcc/testsuite/ChangeLog:
> 
>       * g++.dg/vect/complex-num-null-node.cc: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc 
> b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..eddf0ad8710138c3b5ac31706df02109d8164329
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/vect/simd-complex-num-null-node.cc
> @@ -0,0 +1,78 @@
> +/* { dg-do compile { target { aarch64-*-* } } } */
> +/* { dg-skip-if "incorrect syntax for c++98" { *-*-* } { "-std=c++98" } { "" 
> } } */
> +/* { dg-additional-options "-w -O3 -march=armv8.3-a -fdump-tree-vect-all" } 
> */
> +/* { dg-require-effective-target le } */
> +
> +typedef struct {
> +  float b;
> +  float c;
> +} d;
> +namespace {
> +typedef int e;
> +template <typename, typename> struct f;
> +template <template <typename> class g, typename h, typename k, typename... l>
> +struct f<g<k, l...>, h> {
> +  using m = g<h>;
> +};
> +} // namespace
> +namespace aa {
> +template <typename k> class o {
> +public:
> +  typedef k p;
> +};
> +} // namespace aa
> +namespace ab {
> +template <typename k> using r = aa::o<k>;
> +template <typename k> class ac : public r<k> {};
> +struct s {
> +  template <typename k, typename h> struct ad : f<k, h> {};
> +};
> +template <typename t, typename h> using ae = typename s::ad<t, h>::m;
> +template <typename t> struct af {
> +  typedef typename t::p p;
> +  template <typename k> using u = ae<t, k>;
> +};
> +} // namespace ab
> +namespace aa {
> +template <typename t> struct ag {
> +  typedef ab::af<t> v;
> +  typedef typename v::p &ah;
> +  template <typename k> struct ai { typedef typename v::u<k> aj; };
> +};
> +} // namespace aa
> +namespace ab {
> +template <typename k, typename t> struct w {
> +  typedef typename aa::ag<t>::ai<k>::aj x;
> +  struct y {};
> +  typedef t ak;
> +  w(e, ak);
> +  y a;
> +};
> +template <typename k, typename t = ac<k>> class al : w<k, t> {
> +  typedef w<k, t> am;
> +  typedef typename am::x x;
> +  typedef aa::ag<x> an;
> +
> +public:
> +  typedef typename an::ah ah;
> +  typedef e ao;
> +  typedef t ak;
> +  al(ao ap, ak aq = ak()) : am(ar(ap, aq), aq) {}
> +  ah operator[](ao);
> +  ao ar(ao ap, ak) { return ap; }
> +};
> +} // namespace ab
> +void as(int n, d *a, d *q) {
> +  ab::al<d> z(n);
> +  d acc;
> +  for (int j = 0; j < n; ++j) {
> +    auto at = a[j];
> +    auto au = q[j];
> +    acc.b += at.b * au.b - at.c * au.c;
> +    acc.c += at.b * au.c + at.c * au.b;
> +  }
> +  z[0] = acc;
> +}
> +
> +
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_MUL" 1 "vect" } } */
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index 
> 09a0f42410e3542c7823d70757550c22b8ef676f..aed0c7798b6203de6bce7a4764aeedd9a554670d
>  100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -2300,7 +2300,7 @@ optimize_load_redistribution_1 
> (scalar_stmts_to_slp_tree_map_t *bst_map,
>    unsigned i;
>  
>    /* For now, we don't know anything about externals so do not do anything.  
> */
> -  if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
> +  if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def)
>      return NULL;
>    else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR)
>      {
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to