Tamar Christina <tamar.christ...@arm.com> writes:
> Hi,
>
> Here's a respin of the patch which I think encompasses the method you 
> expected.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       * tree-vect-stmts.c (vectorizable_condition): Check if inverse of mask
>       is live.
>       * tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree):
>       Register mask inverses.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/pred-not-gen-1.c: Update testcase.
>       * gcc.target/aarch64/sve/pred-not-gen-2.c: Update testcase.
>       * gcc.target/aarch64/sve/pred-not-gen-3.c: Update testcase.
>       * gcc.target/aarch64/sve/pred-not-gen-4.c: Update testcase.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> index 
> 2c06564186c5a5e7917da475a9c201c81dfeb136..7fac35ea9387818aaa0a12ef66d02313013203af
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */

Why do we need this?  The --save-temps should ensure that we still
run the scan tests.

Very minor nit, but could you tabify the rest of operator==?  The new
line is correctly formatted but the existing ones aren't, which stands
out in the diff.

The patch is OK without the dg-do changes to the tests, if that works.

Thanks,
Richard

>  
>  /*
> @@ -21,3 +21,4 @@ void f10(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, #0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> index 
> 0c3b78d4c67455c971e94fb2ffdd7be2d4884864..d73f7cbeb6be7ad2cc54e601d6c4fbd4d98fa675
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  /*
> @@ -21,3 +21,4 @@ void f11(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, #0.0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> index 
> 248f8ab57191ce8a1d4c334533de8bc76aa07691..1240d7cb86d00221b546d81f128d64d22d347885
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  /*
> @@ -19,3 +19,4 @@ void f12(int * restrict z, int * restrict w, int * restrict 
> x, int * restrict y,
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} } } */
> +/* { dg-final { scan-assembler-times {\tcmple\tp[0-9]+\.s, p[0-9]+/z, 
> z[0-9]+\.s, #0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> index 
> 96200309880a91ad1db5801115c911cfdce06125..edda9c115900ca62268425f1616d975f6a7b7721
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  #include <math.h>
> @@ -12,3 +12,4 @@ void f13(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, z[0-9]+\.d} 1 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> 27ee48aea429810a37777d907435a92b8fd1817d..23f7bed26626a872c165cd2654bb4391a847bd7e
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -10375,6 +10375,7 @@ vectorizable_condition (vec_info *vinfo,
>         else
>           {
>             bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
> +           tree_code orig_code = cond.code;
>             cond.code = invert_tree_comparison (cond.code, honor_nans);
>             if (loop_vinfo->scalar_cond_masked_set.contains (cond))
>               {
> @@ -10382,6 +10383,22 @@ vectorizable_condition (vec_info *vinfo,
>                 cond_code = cond.code;
>                 swap_cond_operands = true;
>               }
> +           else
> +             {
> +               /* Try the inverse of the current mask.  We check if the
> +                  inverse mask is live and if so we generate a negate of
> +                  the current mask such that we still honor NaNs.  */
> +               cond.inverted_p = true;
> +               cond.code = orig_code;
> +               if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> +                 {
> +                   bitop1 = orig_code;
> +                   bitop2 = BIT_NOT_EXPR;
> +                   masks = &LOOP_VINFO_MASKS (loop_vinfo);
> +                   cond_code = cond.code;
> +                   swap_cond_operands = true;
> +                 }
> +             }
>           }
>       }
>      }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> 866d813a12c5df42b53e5f0770fdc76e9e8b471e..aa0ab4443c5905ad79c5d722f97b3c598b94b669
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -266,6 +266,7 @@ struct scalar_cond_masked_key
>    void get_cond_ops_from_tree (tree);
>  
>    unsigned ncopies;
> +  bool inverted_p;
>    tree_code code;
>    tree op0;
>    tree op1;
> @@ -285,6 +286,7 @@ struct default_hash_traits<scalar_cond_masked_key>
>      inchash::add_expr (v.op0, h, 0);
>      inchash::add_expr (v.op1, h, 0);
>      h.add_int (v.ncopies);
> +    h.add_flag (v.inverted_p);
>      return h.end ();
>    }
>  
> @@ -293,6 +295,7 @@ struct default_hash_traits<scalar_cond_masked_key>
>    {
>      return (existing.ncopies == candidate.ncopies
>             && existing.code == candidate.code
> +        && existing.inverted_p == candidate.inverted_p
>             && operand_equal_p (existing.op0, candidate.op0, 0)
>             && operand_equal_p (existing.op1, candidate.op1, 0));
>    }
> @@ -303,6 +306,7 @@ struct default_hash_traits<scalar_cond_masked_key>
>    mark_empty (value_type &v)
>    {
>      v.ncopies = 0;
> +    v.inverted_p = false;
>    }
>  
>    static inline bool
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 
> 1e370d60fb19b03c3b6bce45c660af4b6d32dc51..6c58acbb12431d8a92d6de478f16f51cd0261086
>  100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -1696,6 +1696,7 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
>        this->code = TREE_CODE (t);
>        this->op0 = TREE_OPERAND (t, 0);
>        this->op1 = TREE_OPERAND (t, 1);
> +      this->inverted_p = false;
>        return;
>      }
>  
> @@ -1708,11 +1709,29 @@ scalar_cond_masked_key::get_cond_ops_from_tree (tree 
> t)
>           this->code = code;
>           this->op0 = gimple_assign_rhs1 (stmt);
>           this->op1 = gimple_assign_rhs2 (stmt);
> +         this->inverted_p = false;
>           return;
>         }
> +     else if (code == BIT_NOT_EXPR)
> +       {
> +         tree n_op = gimple_assign_rhs1 (stmt);
> +         if ((stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (n_op))))
> +           {
> +             code = gimple_assign_rhs_code (stmt);
> +             if (TREE_CODE_CLASS (code) == tcc_comparison)
> +               {
> +                 this->code = code;
> +                 this->op0 = gimple_assign_rhs1 (stmt);
> +                 this->op1 = gimple_assign_rhs2 (stmt);
> +                 this->inverted_p = true;
> +                 return;
> +               }
> +           }
> +       }
>        }
>  
>    this->code = NE_EXPR;
>    this->op0 = t;
>    this->op1 = build_zero_cst (TREE_TYPE (t));
> +  this->inverted_p = false;
>  }

Reply via email to