Hi!

On the following testcase we end up with a comparison (EQ_EXPR in this case)
with unsupported vector operands, but supported result (vector boolean
type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
a VEC_COND_EXPR which is also not supported by the optab and has the vector
boolean type with scalar mode as the first operand.

The last hunk makes sure that we don't just ignore lowering of the comparison
when it has an integer bitmask result but unsupported vector operands.
The expand_vector_comparison changes makes sure we lower the comparison
properly into the integer bitmask and finally the expand_vector_condition
changes makes sure we lower properly the VEC_COND_EXPR.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-07-17  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/91157
        * tree-vect-generic.c (expand_vector_comparison): Handle lhs being
        a vector boolean with scalar mode.
        (expand_vector_condition): Handle first operand being a vector boolean
        with scalar mode.
        (expand_vector_operations_1): For comparisons, don't bail out early
        if the return type is vector boolean with scalar mode, but comparison
        operand type is not.

        * gcc.target/i386/avx512f-pr91157.c: New test.
        * gcc.target/i386/avx512bw-pr91157.c: New test.

--- gcc/tree-vect-generic.c.jj  2019-07-04 00:18:37.063010439 +0200
+++ gcc/tree-vect-generic.c     2019-07-16 12:40:41.343059690 +0200
@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
   tree t;
   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
-    t = expand_vector_piecewise (gsi, do_compare, type,
-                                TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
+    {
+      if (VECTOR_BOOLEAN_TYPE_P (type)
+         && VECTOR_BOOLEAN_TYPE_P (type)
+         && SCALAR_INT_MODE_P (TYPE_MODE (type))
+         && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
+                      TYPE_VECTOR_SUBPARTS (type)
+                      * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+                                               (TREE_TYPE (type)))))
+       {
+         tree inner_type = TREE_TYPE (TREE_TYPE (op0));
+         tree part_width = TYPE_SIZE (inner_type);
+         tree index = bitsize_int (0);
+         int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
+         int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
+         tree ret_type = build_nonstandard_integer_type (prec, 1);
+         tree ret_inner_type = boolean_type_node;
+         int i;
+         location_t loc = gimple_location (gsi_stmt (*gsi));
+         t = build_zero_cst (ret_type);
+
+         if (TYPE_PRECISION (ret_inner_type) != 1)
+           ret_inner_type = build_nonstandard_integer_type (1, 1);
+         warning_at (loc, OPT_Wvector_operation_performance,
+                     "vector operation will be expanded piecewise");
+         for (i = 0; i < nunits;
+              i++, index = int_const_binop (PLUS_EXPR, index, part_width))
+           {
+             tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
+                                        index);
+             tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
+                                        index);
+             tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
+             t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
+                                  bitsize_int (i));
+           }
+         t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
+       }
+      else
+       t = expand_vector_piecewise (gsi, do_compare, type,
+                                    TREE_TYPE (TREE_TYPE (op0)), op0, op1,
+                                    code);
+    }
   else
     t = NULL_TREE;
 
@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
   tree a1 = a;
   tree a2 = NULL_TREE;
   bool a_is_comparison = false;
+  bool a_is_scalar_bitmask = false;
   tree b = gimple_assign_rhs2 (stmt);
   tree c = gimple_assign_rhs3 (stmt);
   vec<constructor_elt, va_gc> *v;
@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
   warning_at (loc, OPT_Wvector_operation_performance,
              "vector condition will be expanded piecewise");
 
+  if (!a_is_comparison
+      && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
+      && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
+      && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
+                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
+                  * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
+                                               (TREE_TYPE (TREE_TYPE (a))))))
+    {
+      a_is_scalar_bitmask = true;
+      int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
+      tree atype = build_nonstandard_integer_type (prec, 1);
+      a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
+    }
+
   int nunits = nunits_for_known_piecewise_op (type);
   vec_alloc (v, nunits);
   for (i = 0; i < nunits; i++)
@@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite
                                       comp_width, comp_index);
          aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
        }
+      else if (a_is_scalar_bitmask)
+       {
+         wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
+         result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
+                                   a, wide_int_to_tree (TREE_TYPE (a), w));
+         aa = fold_build2 (NE_EXPR, boolean_type_node, result,
+                           build_zero_cst (TREE_TYPE (a)));
+       }
       else
        aa = tree_vec_extract (gsi, cond_type, a, width, index);
       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
@@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_
   /* A scalar operation pretending to be a vector one.  */
   if (VECTOR_BOOLEAN_TYPE_P (type)
       && !VECTOR_MODE_P (TYPE_MODE (type))
-      && TYPE_MODE (type) != BLKmode)
+      && TYPE_MODE (type) != BLKmode
+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
+         || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
+             && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
+             && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
     return;
 
   /* If the vector operation is operating on all same vector elements
--- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj  2019-07-16 
12:54:55.928900526 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c     2019-07-16 
13:01:39.217714434 +0200
@@ -0,0 +1,29 @@
+/* PR tree-optimization/91157 */
+/* { dg-do run { target { avx512f && lp64 } } } */
+/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions 
-fsignaling-nans" } */
+
+#include "avx512f-helper.h"
+
+typedef long double V __attribute__ ((vector_size (4 * sizeof (long double))));
+typedef __int128 W __attribute__ ((vector_size (4 * sizeof (__int128))));
+
+__attribute__((noipa)) W
+foo (V x)
+{
+  return x == 0;
+}
+
+static void
+test_512 (void)
+{
+  V a = { 5.0L, 0.0L, -0.0L, -17.0L };
+  V b = { -0.0L, 16.0L, 0.0L, 18.0L };
+  V c = { 6.0L, 7.0L, 8.0L, 0.0L };
+  W ar = foo (a);
+  W br = foo (b);
+  W cr = foo (c);
+  if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0
+      || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0
+      || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1)
+    __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj 2019-07-16 
12:55:11.609659992 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c    2019-07-16 
13:01:10.438155882 +0200
@@ -0,0 +1,6 @@
+/* PR tree-optimization/91157 */
+/* { dg-do run { target { avx512bw && lp64 } } } */
+/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions 
-fsignaling-nans" } */
+
+#define AVX512BW
+#include "avx512f-pr91157.c"

        Jakub

Reply via email to