On Fri, 25 Oct 2019 at 14:18, Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> Hi Prathamesh,
>
> I've just committed a patch that fixes a large number of SVE
> reduction-related failures.  Could you rebase and retest on top of that?
> Sorry for messing you around, but regression testing based on the state
> before the patch wouldn't have been that meaningful.  In particular...
>
> Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> > index a70d52eb2ca..82814e2c2af 100644
> > --- a/gcc/tree-vect-loop.c
> > +++ b/gcc/tree-vect-loop.c
> > @@ -6428,6 +6428,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, 
> > slp_tree slp_node,
> >    if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
> >      {
> >        if (reduction_type != FOLD_LEFT_REDUCTION
> > +       && reduction_type != EXTRACT_LAST_REDUCTION
> >         && !mask_by_cond_expr
> >         && (cond_fn == IFN_LAST
> >             || !direct_internal_fn_supported_p (cond_fn, vectype_in,
>
> ...after today's patch, it's instead necessary to remove:
>
>       if (loop_vinfo
>           && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
>           && reduction_type == EXTRACT_LAST_REDUCTION)
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                              "can't yet use a fully-masked loop for"
>                              " EXTRACT_LAST_REDUCTION.\n");
>           LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
>         }
>
> from vectorizable_condition.  We no longer need any changes to
> vectorizable_reduction itself.
>
> > @@ -10180,18 +10181,29 @@ vectorizable_condition (stmt_vec_info stmt_info, 
> > gimple_stmt_iterator *gsi,
> >            vec != { 0, ... } (masked in the MASK_LOAD,
> >            unmasked in the VEC_COND_EXPR).  */
> >
> > -       if (loop_mask)
> > +       if (masks)
> >           {
> > -           if (COMPARISON_CLASS_P (vec_compare))
> > +           unsigned vec_num = vec_oprnds0.length ();
> > +           loop_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
> > +                                           vectype, vec_num * j + i);
>
> Ah... now that the two cases are merged (good!), just "if (masks)" isn't
> right after all, sorry for the misleading comment.  I think this should
> instead be:
>
>           /* Force vec_compare to be an SSA_NAME rather than a comparison,
>              in cases where that's necessary.  */
>           if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
>             {
>
> Not doing that would break unmasked EXTRACT_LAST_REDUCTIONs.
Ah right, thanks for pointing out!
>
> Then make the existing:
>
>               tree tmp2 = make_ssa_name (vec_cmp_type);
>               gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
>                                                 vec_compare, loop_mask);
>               vect_finish_stmt_generation (stmt_info, g, gsi);
>               vec_compare = tmp2;
>
> conditional on "if (masks)" only, and defer the calculation of loop_mask
> to this point too.
>
> [ It ould be good to spot-check that aarch64-sve.exp passes after making
>   the changes to the stmt-generation part of vectorizable_condition,
>   but before removing the:
>
>             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
>
>   quoted above.  That would show that unmasked fold-left reductions
>   still work after the changes.
>
>   There are still some lingering cases in which we can test unmasked
>   SVE loops directly, but they're becoming rarer and should eventually
>   go away altogether.  So I don't think it's worth trying to construct
>   an unmasked test for the testsuite. ]
>
> > +
> > +              if (!is_gimple_val (vec_compare))
> > +                {
> > +                  tree vec_compare_name = make_ssa_name (vec_cmp_type);
> > +                  gassign *new_stmt = gimple_build_assign 
> > (vec_compare_name,
> > +                                                           vec_compare);
> > +                  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
> > +                  vec_compare = vec_compare_name;
> > +                }
>
> Should use tab-based indentation.
Thanks for the suggestions, does the attached version look OK ?
Comparing aarch64-sve.exp before/after patch shows no regressions,
bootstrap+test in progress.

Thanks,
Prathamesh
>
> Thanks,
> Richard
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
index d4f9b0b6a94..d3ea52dea47 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
index 2c49bd3b0f0..c222b707912 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #include <stdint.h>
 
@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
index 35344f446c6..5aaa71f948d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint8_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\tb[0-9]+, p[0-7], b[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
index ce58abd6161..b4db170ea06 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE int16_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
index 2b9783d6627..28d40a01f93 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint64_t
 
 #include "clastb_2.c"
 
-/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
+/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
index c47d303f730..38632a21be1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
index 3345f874a39..e5307d2edc8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
@@ -1,7 +1,8 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE double
 #include "clastb_6.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
index d86a428a7fa..583fc8d8d6d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
 
 #include <stdint.h>
 
@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
 TEST_TYPE (uint32_t);
 TEST_TYPE (uint64_t);
 
+/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 19ac82fe4e3..588145ebdce 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10050,16 +10050,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		return false;
 	    }
 	}
-      if (loop_vinfo
-	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
-	  && reduction_type == EXTRACT_LAST_REDUCTION)
-	{
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "can't yet use a fully-masked loop for"
-			     " EXTRACT_LAST_REDUCTION.\n");
-	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-	}
       if (expand_vec_cond_expr_p (vectype, comp_vectype,
 				     cond_code))
 	{
@@ -10089,31 +10079,31 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   /* Handle cond expr.  */
   for (j = 0; j < ncopies; j++)
     {
-      tree loop_mask = NULL_TREE;
       bool swap_cond_operands = false;
 
       /* See whether another part of the vectorized code applies a loop
 	 mask to the condition, or to its inverse.  */
 
+      vec_loop_masks *masks = NULL;
       if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
 	{
-	  scalar_cond_masked_key cond (cond_expr, ncopies);
-	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-	    {
-	      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-	      loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
-	    }
+	  if (reduction_type == EXTRACT_LAST_REDUCTION)
+	    masks = &LOOP_VINFO_MASKS (loop_vinfo);
 	  else
 	    {
-	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
-	      cond.code = invert_tree_comparison (cond.code, honor_nans);
+	      scalar_cond_masked_key cond (cond_expr, ncopies);
 	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+		masks = &LOOP_VINFO_MASKS (loop_vinfo);
+	      else
 		{
-		  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-		  loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
-						  vectype, j);
-		  cond_code = cond.code;
-		  swap_cond_operands = true;
+		  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+		  cond.code = invert_tree_comparison (cond.code, honor_nans);
+		  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
+		    {
+		      masks = &LOOP_VINFO_MASKS (loop_vinfo);
+		      cond_code = cond.code;
+		      swap_cond_operands = true;
+		    }
 		}
 	    }
 	}
@@ -10248,28 +10238,10 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 	     vec != { 0, ... } (masked in the MASK_LOAD,
 	     unmasked in the VEC_COND_EXPR).  */
 
-	  if (loop_mask)
-	    {
-	      if (COMPARISON_CLASS_P (vec_compare))
-		{
-		  tree tmp = make_ssa_name (vec_cmp_type);
-		  tree op0 = TREE_OPERAND (vec_compare, 0);
-		  tree op1 = TREE_OPERAND (vec_compare, 1);
-		  gassign *g = gimple_build_assign (tmp,
-						    TREE_CODE (vec_compare),
-						    op0, op1);
-		  vect_finish_stmt_generation (stmt_info, g, gsi);
-		  vec_compare = tmp;
-		}
-
-	      tree tmp2 = make_ssa_name (vec_cmp_type);
-	      gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
-						vec_compare, loop_mask);
-	      vect_finish_stmt_generation (stmt_info, g, gsi);
-	      vec_compare = tmp2;
-	    }
+	  /* Force vec_compare to be an SSA_NAME rather than a comparison,
+	     in cases where that's necessary.  */
 
-	  if (reduction_type == EXTRACT_LAST_REDUCTION)
+	  if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
 	    {
 	      if (!is_gimple_val (vec_compare))
 		{
@@ -10279,6 +10251,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 		  vec_compare = vec_compare_name;
 		}
+
 	      if (must_invert_cmp_result)
 		{
 		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
@@ -10288,6 +10261,23 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 		  vec_compare = vec_compare_name;
 		}
+
+	      if (masks)
+		{
+		  unsigned vec_num = vec_oprnds0.length ();
+		  tree loop_mask
+		    = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+					  vectype, vec_num * j + i);
+		  tree tmp2 = make_ssa_name (vec_cmp_type);
+		  gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
+						vec_compare, loop_mask);
+		  vect_finish_stmt_generation (stmt_info, g, gsi);
+		  vec_compare = tmp2;
+		}
+	    }
+
+	  if (reduction_type == EXTRACT_LAST_REDUCTION)
+	    {
 	      gcall *new_stmt = gimple_build_call_internal
 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
 		 vec_then_clause);

Reply via email to