The fold-left reduction code has a (rarely-used) fallback that handles
cases in which the loop is fully-masked and the target has no native
support for the reduction.  The fallback includea a VEC_COND_EXPR
between the reduction vector and a safe value, so we should check
whether that VEC_COND_EXPR is supported.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2019-12-27  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * tree-vect-loop.c (vectorizable_reduction): Check whether the
        target supports the required VEC_COND_EXPR operation before
        allowing the fallback handling of masked fold-left reductions.

gcc/testsuite/
        * gcc.target/aarch64/sve/mixed_size_10.c: New test.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        2019-12-19 13:21:37.554633868 +0000
+++ gcc/tree-vect-loop.c        2019-12-27 16:01:39.344247474 +0000
@@ -6718,6 +6718,18 @@ vectorizable_reduction (stmt_vec_info st
                             " conditional operation is available.\n");
          LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
        }
+      else if (reduction_type == FOLD_LEFT_REDUCTION
+              && reduc_fn == IFN_LAST
+              && !expand_vec_cond_expr_p (vectype_in,
+                                          truth_type_for (vectype_in),
+                                          SSA_NAME))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't use a fully-masked loop because no"
+                            " conditional operation is available.\n");
+         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+       }
       else
        vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
                               vectype_in, NULL);
Index: gcc/testsuite/gcc.target/aarch64/sve/mixed_size_10.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mixed_size_10.c        2019-12-27 
16:01:39.344247474 +0000
@@ -0,0 +1,13 @@
+/* { dg-options "-O3 -msve-vector-bits=256 -fno-tree-loop-distribution" } */
+
+float
+f (float *restrict x, double *restrict y)
+{
+  float res = 0.0;
+  for (int i = 0; i < 100; ++i)
+    {
+      res += x[i];
+      y[i] += y[i - 4] * 11;
+    }
+  return res;
+}

Reply via email to