On 17 July 2019 09:01:36 CEST, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >On the following testcase we end up with a comparison (EQ_EXPR in this >case) >with unsupported vector operands, but supported result (vector boolean >type with scalar mode, i.e. the AVX512F-ish integer bitmask) and later >a VEC_COND_EXPR which is also not supported by the optab and has the >vector >boolean type with scalar mode as the first operand. > >The last hunk makes sure that we don't just ignore lowering of the >comparison >when it has an integer bitmask result but unsupported vector operands. >The expand_vector_comparison changes makes sure we lower the comparison >properly into the integer bitmask and finally the >expand_vector_condition >changes makes sure we lower properly the VEC_COND_EXPR. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > >2019-07-17 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/91157 > * tree-vect-generic.c (expand_vector_comparison): Handle lhs being > a vector boolean with scalar mode. > (expand_vector_condition): Handle first operand being a vector boolean > with scalar mode. > (expand_vector_operations_1): For comparisons, don't bail out early > if the return type is vector boolean with scalar mode, but comparison > operand type is not. > > * gcc.target/i386/avx512f-pr91157.c: New test. > * gcc.target/i386/avx512bw-pr91157.c: New test. > >--- gcc/tree-vect-generic.c.jj 2019-07-04 00:18:37.063010439 +0200 >+++ gcc/tree-vect-generic.c 2019-07-16 12:40:41.343059690 +0200 >@@ -382,8 +382,48 @@ expand_vector_comparison (gimple_stmt_it > tree t; > if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) > && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) >- t = expand_vector_piecewise (gsi, do_compare, type, >- TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); >+ { >+ if (VECTOR_BOOLEAN_TYPE_P (type) >+ && VECTOR_BOOLEAN_TYPE_P (type)
The above condition looks redundant, fwiw. Did you mean to check op0? thanks, >+ && SCALAR_INT_MODE_P (TYPE_MODE (type)) >+ && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)), >+ TYPE_VECTOR_SUBPARTS (type) >+ * GET_MODE_BITSIZE (SCALAR_TYPE_MODE >+ (TREE_TYPE (type))))) >+ { >+ tree inner_type = TREE_TYPE (TREE_TYPE (op0)); >+ tree part_width = TYPE_SIZE (inner_type); >+ tree index = bitsize_int (0); >+ int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0)); >+ int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type)); >+ tree ret_type = build_nonstandard_integer_type (prec, 1); >+ tree ret_inner_type = boolean_type_node; >+ int i; >+ location_t loc = gimple_location (gsi_stmt (*gsi)); >+ t = build_zero_cst (ret_type); >+ >+ if (TYPE_PRECISION (ret_inner_type) != 1) >+ ret_inner_type = build_nonstandard_integer_type (1, 1); >+ warning_at (loc, OPT_Wvector_operation_performance, >+ "vector operation will be expanded piecewise"); >+ for (i = 0; i < nunits; >+ i++, index = int_const_binop (PLUS_EXPR, index, part_width)) >+ { >+ tree a = tree_vec_extract (gsi, inner_type, op0, part_width, >+ index); >+ tree b = tree_vec_extract (gsi, inner_type, op1, part_width, >+ index); >+ tree result = gimplify_build2 (gsi, code, ret_inner_type, a, >b); >+ t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result, >+ bitsize_int (i)); >+ } >+ t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); >+ } >+ else >+ t = expand_vector_piecewise (gsi, do_compare, type, >+ TREE_TYPE (TREE_TYPE (op0)), op0, op1, >+ code); >+ } > else > t = NULL_TREE; > >@@ -879,6 +919,7 @@ expand_vector_condition (gimple_stmt_ite > tree a1 = a; > tree a2 = NULL_TREE; > bool a_is_comparison = false; >+ bool a_is_scalar_bitmask = false; > tree b = gimple_assign_rhs2 (stmt); > tree c = gimple_assign_rhs3 (stmt); > vec<constructor_elt, va_gc> *v; >@@ -942,6 +983,20 @@ expand_vector_condition (gimple_stmt_ite > warning_at (loc, OPT_Wvector_operation_performance, > "vector condition will be expanded piecewise"); > >+ if (!a_is_comparison >+ && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)) >+ && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a))) >+ && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))), >+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (a)) >+ * GET_MODE_BITSIZE (SCALAR_TYPE_MODE >+ (TREE_TYPE (TREE_TYPE (a)))))) >+ { >+ a_is_scalar_bitmask = true; >+ int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE >(a))); >+ tree atype = build_nonstandard_integer_type (prec, 1); >+ a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a); >+ } >+ > int nunits = nunits_for_known_piecewise_op (type); > vec_alloc (v, nunits); > for (i = 0; i < nunits; i++) >@@ -957,6 +1012,14 @@ expand_vector_condition (gimple_stmt_ite > comp_width, comp_index); > aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); > } >+ else if (a_is_scalar_bitmask) >+ { >+ wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE >(a))); >+ result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a), >+ a, wide_int_to_tree (TREE_TYPE (a), w)); >+ aa = fold_build2 (NE_EXPR, boolean_type_node, result, >+ build_zero_cst (TREE_TYPE (a))); >+ } > else > aa = tree_vec_extract (gsi, cond_type, a, width, index); > result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); >@@ -1941,7 +2004,11 @@ expand_vector_operations_1 (gimple_stmt_ > /* A scalar operation pretending to be a vector one. */ > if (VECTOR_BOOLEAN_TYPE_P (type) > && !VECTOR_MODE_P (TYPE_MODE (type)) >- && TYPE_MODE (type) != BLKmode) >+ && TYPE_MODE (type) != BLKmode >+ && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != >tcc_comparison >+ || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)) >+ && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1))) >+ && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode))) > return; > > /* If the vector operation is operating on all same vector elements >--- gcc/testsuite/gcc.target/i386/avx512f-pr91157.c.jj 2019-07-16 >12:54:55.928900526 +0200 >+++ gcc/testsuite/gcc.target/i386/avx512f-pr91157.c 2019-07-16 >13:01:39.217714434 +0200 >@@ -0,0 +1,29 @@ >+/* PR tree-optimization/91157 */ >+/* { dg-do run { target { avx512f && lp64 } } } */ >+/* { dg-options "-O2 -mavx512f -fexceptions -fnon-call-exceptions >-fsignaling-nans" } */ >+ >+#include "avx512f-helper.h" >+ >+typedef long double V __attribute__ ((vector_size (4 * sizeof (long >double)))); >+typedef __int128 W __attribute__ ((vector_size (4 * sizeof >(__int128)))); >+ >+__attribute__((noipa)) W >+foo (V x) >+{ >+ return x == 0; >+} >+ >+static void >+test_512 (void) >+{ >+ V a = { 5.0L, 0.0L, -0.0L, -17.0L }; >+ V b = { -0.0L, 16.0L, 0.0L, 18.0L }; >+ V c = { 6.0L, 7.0L, 8.0L, 0.0L }; >+ W ar = foo (a); >+ W br = foo (b); >+ W cr = foo (c); >+ if (ar[0] != 0 || ar[1] != -1 || ar[2] != -1 || ar[3] != 0 >+ || br[0] != -1 || br[1] != 0 || br[2] != -1 || br[3] != 0 >+ || cr[0] != 0 || cr[1] != 0 || cr[2] != 0 || cr[3] != -1) >+ __builtin_abort (); >+} >--- gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c.jj 2019-07-16 >12:55:11.609659992 +0200 >+++ gcc/testsuite/gcc.target/i386/avx512bw-pr91157.c 2019-07-16 >13:01:10.438155882 +0200 >@@ -0,0 +1,6 @@ >+/* PR tree-optimization/91157 */ >+/* { dg-do run { target { avx512bw && lp64 } } } */ >+/* { dg-options "-O2 -mavx512bw -fexceptions -fnon-call-exceptions >-fsignaling-nans" } */ >+ >+#define AVX512BW >+#include "avx512f-pr91157.c" > > Jakub