On Thu, 5 Jan 2017, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR, while vectorizable_comparison has code to deal > with comparison of 2 booleans by transorming those into one or two > BIT_*_EXPR operations that work equally well on normal vectors as well > as the AVX512 bitset masks, vectorizable_comparison lacks that and we > ICE during expansion because of that. > The following patch teaches vectorizable_condition to do that too. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Richard. > 2017-01-05 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/78938 > * tree-vect-stmts.c (vectorizable_condition): For non-masked COND_EXPR > where comp_vectype is VECTOR_BOOLEAN_TYPE_P, use > BIT_{NOT,XOR,AND,IOR}_EXPR on the comparison operands instead of > {EQ,NE,GE,GT,LE,LT}_EXPR directly inside of VEC_COND_EXPR. Formatting > fixes. > > * gcc.dg/vect/pr78938.c: New test. > > --- gcc/tree-vect-stmts.c.jj 2017-01-01 12:45:39.000000000 +0100 > +++ gcc/tree-vect-stmts.c 2017-01-05 15:54:41.075218409 +0100 > @@ -7731,7 +7731,8 @@ vectorizable_condition (gimple *stmt, gi > { > tree scalar_dest = NULL_TREE; > tree vec_dest = NULL_TREE; > - tree cond_expr, then_clause, else_clause; > + tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE; > + tree then_clause, else_clause; > stmt_vec_info stmt_info = vinfo_for_stmt (stmt); > tree comp_vectype = NULL_TREE; > tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE; > @@ -7741,7 +7742,7 @@ vectorizable_condition (gimple *stmt, gi > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); > enum vect_def_type dt, dts[4]; > int ncopies; > - enum tree_code code; > + enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR; > stmt_vec_info prev_stmt_info = NULL; > int i, j; > bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); > @@ -7825,11 +7826,76 @@ vectorizable_condition (gimple *stmt, gi > if (vec_cmp_type == NULL_TREE) > return false; > > + cond_code = TREE_CODE (cond_expr); > + if (!masked) > + { > + cond_expr0 = TREE_OPERAND (cond_expr, 0); > + cond_expr1 = TREE_OPERAND (cond_expr, 1); > + } > + > + if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) > + { > + /* Boolean values may have another representation in vectors > + and therefore we prefer bit operations over comparison for > + them (which also works for scalar masks). We store opcodes > + to use in bitop1 and bitop2. Statement is vectorized as > + BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) > + depending on bitop1 and bitop2 arity. */ > + switch (cond_code) > + { > + case GT_EXPR: > + bitop1 = BIT_NOT_EXPR; > + bitop2 = BIT_AND_EXPR; > + break; > + case GE_EXPR: > + bitop1 = BIT_NOT_EXPR; > + bitop2 = BIT_IOR_EXPR; > + break; > + case LT_EXPR: > + bitop1 = BIT_NOT_EXPR; > + bitop2 = BIT_AND_EXPR; > + std::swap (cond_expr0, cond_expr1); > + break; > + case LE_EXPR: > + bitop1 = BIT_NOT_EXPR; > + bitop2 = BIT_IOR_EXPR; > + std::swap (cond_expr0, cond_expr1); > + break; > + case NE_EXPR: > + bitop1 = BIT_XOR_EXPR; > + break; > + case EQ_EXPR: > + bitop1 = BIT_XOR_EXPR; > + bitop2 = BIT_NOT_EXPR; > + break; > + default: > + return false; > + } > + cond_code = SSA_NAME; > + } > + > if (!vec_stmt) > { > STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; > + if (bitop1 != NOP_EXPR) > + { > + machine_mode mode = TYPE_MODE (comp_vectype); > + optab optab; > + > + optab = optab_for_tree_code (bitop1, comp_vectype, optab_default); > + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) > + return false; > + > + if (bitop2 != NOP_EXPR) > + { > + optab = optab_for_tree_code (bitop2, comp_vectype, > + optab_default); > + if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing) > + return false; > + } > + } > return expand_vec_cond_expr_p (vectype, comp_vectype, > - TREE_CODE (cond_expr)); > + cond_code); > } > > /* Transform. */ > @@ -7858,11 +7924,11 @@ vectorizable_condition (gimple *stmt, gi > auto_vec<vec<tree>, 4> vec_defs; > > if (masked) > - ops.safe_push (cond_expr); > + ops.safe_push (cond_expr); > else > { > - ops.safe_push (TREE_OPERAND (cond_expr, 0)); > - ops.safe_push (TREE_OPERAND (cond_expr, 1)); > + ops.safe_push (cond_expr0); > + ops.safe_push (cond_expr1); > } > ops.safe_push (then_clause); > ops.safe_push (else_clause); > @@ -7886,17 +7952,15 @@ vectorizable_condition (gimple *stmt, gi > } > else > { > - vec_cond_lhs = > - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), > - stmt, comp_vectype); > - vect_is_simple_use (TREE_OPERAND (cond_expr, 0), > - loop_vinfo, >emp, &dts[0]); > - > - vec_cond_rhs = > - vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), > - stmt, comp_vectype); > - vect_is_simple_use (TREE_OPERAND (cond_expr, 1), > - loop_vinfo, >emp, &dts[1]); > + vec_cond_lhs > + = vect_get_vec_def_for_operand (cond_expr0, > + stmt, comp_vectype); > + vect_is_simple_use (cond_expr0, loop_vinfo, >emp, &dts[0]); > + > + vec_cond_rhs > + = vect_get_vec_def_for_operand (cond_expr1, > + stmt, comp_vectype); > + vect_is_simple_use (cond_expr1, loop_vinfo, >emp, &dts[1]); > } > if (reduc_index == 1) > vec_then_clause = reduc_def; > @@ -7953,8 +8017,37 @@ vectorizable_condition (gimple *stmt, gi > else > { > vec_cond_rhs = vec_oprnds1[i]; > - vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type, > - vec_cond_lhs, vec_cond_rhs); > + if (bitop1 == NOP_EXPR) > + vec_compare = build2 (cond_code, vec_cmp_type, > + vec_cond_lhs, vec_cond_rhs); > + else > + { > + new_temp = make_ssa_name (vec_cmp_type); > + if (bitop1 == BIT_NOT_EXPR) > + new_stmt = gimple_build_assign (new_temp, bitop1, > + vec_cond_rhs); > + else > + new_stmt > + = gimple_build_assign (new_temp, bitop1, vec_cond_lhs, > + vec_cond_rhs); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + if (bitop2 == NOP_EXPR) > + vec_compare = new_temp; > + else if (bitop2 == BIT_NOT_EXPR) > + { > + /* Instead of doing ~x ? y : z do x ? z : y. */ > + vec_compare = new_temp; > + std::swap (vec_then_clause, vec_else_clause); > + } > + else > + { > + vec_compare = make_ssa_name (vec_cmp_type); > + new_stmt > + = gimple_build_assign (vec_compare, bitop2, > + vec_cond_lhs, new_temp); > + vect_finish_stmt_generation (stmt, new_stmt, gsi); > + } > + } > } > new_temp = make_ssa_name (vec_dest); > new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, > --- gcc/testsuite/gcc.dg/vect/pr78938.c.jj 2017-01-05 16:06:37.854958230 > +0100 > +++ gcc/testsuite/gcc.dg/vect/pr78938.c 2017-01-05 16:07:04.770609976 > +0100 > @@ -0,0 +1,18 @@ > +/* PR tree-optimization/78938 */ > +/* { dg-do compile } */ > +/* { dg-additional-options "-mavx512bw" { target i?86-*-* x86_64-*-* } } */ > + > +short int v; > + > +int > +foo (char x, int *b) > +{ > + int a = 1; > + for (; x < 1; x++) > + { > + int c = *b != v; > + int d = x != 0; > + a = c == d ? 2 : 0; > + } > + return a; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)