On 17 July 2019 09:01:36 CEST, Jakub Jelinek <ja...@redhat.com> wrote:
>Hi!
>
>On the following testcase we end up with a comparison (EQ_EXPR in this
>case)
>with unsupported vector operands, but supported result (vector boolean
>type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later
>a VEC_COND_EXPR which is also not supported by the optab and has the
>vector
>boolean type with scalar mode as the first operand.
>
>The last hunk makes sure that we don't just ignore lowering of the
>comparison
>when it has an integer bitmask result but unsupported vector operands.
>The expand_vector_comparison changes makes sure we lower the comparison
>properly into the integer bitmask and finally the
>expand_vector_condition
>changes makes sure we lower properly the VEC_COND_EXPR.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
>2019-07-17  Jakub Jelinek  <ja...@redhat.com>
>
>       PR tree-optimization/91157
>       * tree-vect-generic.c (expand_vector_comparison): Handle lhs being
>       a vector boolean with scalar mode.
>       (expand_vector_condition): Handle first operand being a vector boolean
>       with scalar mode.
>       (expand_vector_operations_1): For comparisons, don't bail out early
>       if the return type is vector boolean with scalar mode, but comparison
>       operand type is not.
>
>       * gcc.target/i386/avx512f-pr91157.c: New test.
>       * gcc.target/i386/avx512bw-pr91157.c: New test.
>
>--- gcc/tree-vect-generic.c.jj 2019-07-04 00:18:37.063010439 +0200
>+++ gcc/tree-vect-generic.c    2019-07-16 12:40:41.343059690 +0200
>@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it
>   tree t;
>   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
>       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
>-    t = expand_vector_piecewise (gsi, do_compare, type,
>-                               TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
>+    {
>+      if (VECTOR_BOOLEAN_TYPE_P (type)
>+        && VECTOR_BOOLEAN_TYPE_P (type)

The above condition looks redundant, fwiw.
Did you mean to check op0?

thanks,

>+        && SCALAR_INT_MODE_P (TYPE_MODE (type))
>+        && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
>+                     TYPE_VECTOR_SUBPARTS (type)
>+                     * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+                                              (TREE_TYPE (type)))))
>+      {
>+        tree inner_type = TREE_TYPE (TREE_TYPE (op0));
>+        tree part_width = TYPE_SIZE (inner_type);
>+        tree index = bitsize_int (0);
>+        int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
>+        int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
>+        tree ret_type = build_nonstandard_integer_type (prec, 1);
>+        tree ret_inner_type = boolean_type_node;
>+        int i;
>+        location_t loc = gimple_location (gsi_stmt (*gsi));
>+        t = build_zero_cst (ret_type);
>+
>+        if (TYPE_PRECISION (ret_inner_type) != 1)
>+          ret_inner_type = build_nonstandard_integer_type (1, 1);
>+        warning_at (loc, OPT_Wvector_operation_performance,
>+                    "vector operation will be expanded piecewise");
>+        for (i = 0; i < nunits;
>+             i++, index = int_const_binop (PLUS_EXPR, index, part_width))
>+          {
>+            tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
>+                                       index);
>+            tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
>+                                       index);
>+            tree result = gimplify_build2 (gsi, code, ret_inner_type, a,
>b);
>+            t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
>+                                 bitsize_int (i));
>+          }
>+        t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
>+      }
>+      else
>+      t = expand_vector_piecewise (gsi, do_compare, type,
>+                                   TREE_TYPE (TREE_TYPE (op0)), op0, op1,
>+                                   code);
>+    }
>   else
>     t = NULL_TREE;
> 
>@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite
>   tree a1 = a;
>   tree a2 = NULL_TREE;
>   bool a_is_comparison = false;
>+  bool a_is_scalar_bitmask = false;
>   tree b = gimple_assign_rhs2 (stmt);
>   tree c = gimple_assign_rhs3 (stmt);
>   vec<constructor_elt, va_gc> *v;
>@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite
>   warning_at (loc, OPT_Wvector_operation_performance,
>             "vector condition will be expanded piecewise");
> 
>+  if (!a_is_comparison
>+      && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
>+      && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
>+      && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
>+                 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
>+                 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
>+                                              (TREE_TYPE (TREE_TYPE (a))))))
>+    {
>+      a_is_scalar_bitmask = true;
>+      int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE
>(a)));
>+      tree atype = build_nonstandard_integer_type (prec, 1);
>+      a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
>+    }
>+
>   int nunits = nunits_for_known_piecewise_op (type);
>   vec_alloc (v, nunits);
>   for (i = 0; i < nunits; i++)
>@@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite
>                                      comp_width, comp_index);
>         aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
>       }
>+      else if (a_is_scalar_bitmask)
>+      {
>+        wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE
>(a)));
>+        result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
>+                                  a, wide_int_to_tree (TREE_TYPE (a), w));
>+        aa = fold_build2 (NE_EXPR, boolean_type_node, result,
>+                          build_zero_cst (TREE_TYPE (a)));
>+      }
>       else
>       aa = tree_vec_extract (gsi, cond_type, a, width, index);
>     result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
>@@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_
>   /* A scalar operation pretending to be a vector one.  */
>   if (VECTOR_BOOLEAN_TYPE_P (type)
>       && !VECTOR_MODE_P (TYPE_MODE (type))
>-      && TYPE_MODE (type) != BLKmode)
>+      && TYPE_MODE (type) != BLKmode
>+      && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) !=
>tcc_comparison
>+        || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
>+            && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
>+            && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
>     return;
> 
>   /* If the vector operation is operating on all same vector elements
>--- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj 2019-07-16
>12:54:55.928900526 +0200
>+++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c    2019-07-16
>13:01:39.217714434 +0200
>@@ -0,0 +1,29 @@
>+/* PR tree-optimization/91157 */
>+/* { dg-do run { target { avx512f && lp64 } } } */
>+/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions
>-fsignaling-nans" } */
>+
>+#include "avx512f-helper.h"
>+
>+typedef long double V __attribute__ ((vector_size (4 * sizeof (long
>double))));
>+typedef __int128 W __attribute__ ((vector_size (4 * sizeof
>(__int128))));
>+
>+__attribute__((noipa)) W
>+foo (V x)
>+{
>+  return x == 0;
>+}
>+
>+static void
>+test_512 (void)
>+{
>+  V a = { 5.0L, 0.0L, -0.0L, -17.0L };
>+  V b = { -0.0L, 16.0L, 0.0L, 18.0L };
>+  V c = { 6.0L, 7.0L, 8.0L, 0.0L };
>+  W ar = foo (a);
>+  W br = foo (b);
>+  W cr = foo (c);
>+  if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0
>+      || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0
>+      || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1)
>+    __builtin_abort ();
>+}
>--- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj        2019-07-16
>12:55:11.609659992 +0200
>+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c   2019-07-16
>13:01:10.438155882 +0200
>@@ -0,0 +1,6 @@
>+/* PR tree-optimization/91157 */
>+/* { dg-do run { target { avx512bw && lp64 } } } */
>+/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions
>-fsignaling-nans" } */
>+
>+#define AVX512BW
>+#include "avx512f-pr91157.c"
>
>       Jakub

Reply via email to